1 #include <stdint.h>
2 #include <math.h>
3 #include <float.h>
4 #include "libm.h"
5
6 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
sqrtl(long double x)7 long double sqrtl(long double x)
8 {
9 return sqrt(x);
10 }
11 #elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
12 #include "sqrt_data.h"
13
14 #define FENV_SUPPORT 1
15
16 typedef struct {
17 uint64_t hi;
18 uint64_t lo;
19 } u128;
20
21 /* top: 16 bit sign+exponent, x: significand. */
mkldbl(uint64_t top,u128 x)22 static inline long double mkldbl(uint64_t top, u128 x)
23 {
24 union ldshape u;
25 #if LDBL_MANT_DIG == 113
26 u.i2.hi = x.hi;
27 u.i2.lo = x.lo;
28 u.i2.hi &= 0x0000ffffffffffff;
29 u.i2.hi |= top << 48;
30 #elif LDBL_MANT_DIG == 64
31 u.i.se = top;
32 u.i.m = x.lo;
33 /* force the top bit on non-zero (and non-subnormal) results. */
34 if (top & 0x7fff)
35 u.i.m |= 0x8000000000000000;
36 #endif
37 return u.f;
38 }
39
40 /* return: top 16 bit is sign+exp and following bits are the significand. */
asu128(long double x)41 static inline u128 asu128(long double x)
42 {
43 union ldshape u = {.f=x};
44 u128 r;
45 #if LDBL_MANT_DIG == 113
46 r.hi = u.i2.hi;
47 r.lo = u.i2.lo;
48 #elif LDBL_MANT_DIG == 64
49 r.lo = u.i.m<<49;
50 /* ignore the top bit: pseudo numbers are not handled. */
51 r.hi = u.i.m>>15;
52 r.hi &= 0x0000ffffffffffff;
53 r.hi |= (uint64_t)u.i.se << 48;
54 #endif
55 return r;
56 }
57
58 /* returns a*b*2^-32 - e, with error 0 <= e < 1. */
mul32(uint32_t a,uint32_t b)59 static inline uint32_t mul32(uint32_t a, uint32_t b)
60 {
61 return (uint64_t)a*b >> 32;
62 }
63
64 /* returns a*b*2^-64 - e, with error 0 <= e < 3. */
mul64(uint64_t a,uint64_t b)65 static inline uint64_t mul64(uint64_t a, uint64_t b)
66 {
67 uint64_t ahi = a>>32;
68 uint64_t alo = a&0xffffffff;
69 uint64_t bhi = b>>32;
70 uint64_t blo = b&0xffffffff;
71 return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
72 }
73
add64(u128 a,uint64_t b)74 static inline u128 add64(u128 a, uint64_t b)
75 {
76 u128 r;
77 r.lo = a.lo + b;
78 r.hi = a.hi;
79 if (r.lo < a.lo)
80 r.hi++;
81 return r;
82 }
83
add128(u128 a,u128 b)84 static inline u128 add128(u128 a, u128 b)
85 {
86 u128 r;
87 r.lo = a.lo + b.lo;
88 r.hi = a.hi + b.hi;
89 if (r.lo < a.lo)
90 r.hi++;
91 return r;
92 }
93
sub64(u128 a,uint64_t b)94 static inline u128 sub64(u128 a, uint64_t b)
95 {
96 u128 r;
97 r.lo = a.lo - b;
98 r.hi = a.hi;
99 if (a.lo < b)
100 r.hi--;
101 return r;
102 }
103
sub128(u128 a,u128 b)104 static inline u128 sub128(u128 a, u128 b)
105 {
106 u128 r;
107 r.lo = a.lo - b.lo;
108 r.hi = a.hi - b.hi;
109 if (a.lo < b.lo)
110 r.hi--;
111 return r;
112 }
113
114 /* a<<n, 0 <= n <= 127 */
lsh(u128 a,int n)115 static inline u128 lsh(u128 a, int n)
116 {
117 if (n == 0)
118 return a;
119 if (n >= 64) {
120 a.hi = a.lo<<(n-64);
121 a.lo = 0;
122 } else {
123 a.hi = (a.hi<<n) | (a.lo>>(64-n));
124 a.lo = a.lo<<n;
125 }
126 return a;
127 }
128
129 /* a>>n, 0 <= n <= 127 */
rsh(u128 a,int n)130 static inline u128 rsh(u128 a, int n)
131 {
132 if (n == 0)
133 return a;
134 if (n >= 64) {
135 a.lo = a.hi>>(n-64);
136 a.hi = 0;
137 } else {
138 a.lo = (a.lo>>n) | (a.hi<<(64-n));
139 a.hi = a.hi>>n;
140 }
141 return a;
142 }
143
144 /* returns a*b exactly. */
mul64_128(uint64_t a,uint64_t b)145 static inline u128 mul64_128(uint64_t a, uint64_t b)
146 {
147 u128 r;
148 uint64_t ahi = a>>32;
149 uint64_t alo = a&0xffffffff;
150 uint64_t bhi = b>>32;
151 uint64_t blo = b&0xffffffff;
152 uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
153 uint64_t lo2 = (alo*blo)&0xffffffff;
154 r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
155 r.lo = (lo1<<32) + lo2;
156 return r;
157 }
158
159 /* returns a*b*2^-128 - e, with error 0 <= e < 7. */
mul128(u128 a,u128 b)160 static inline u128 mul128(u128 a, u128 b)
161 {
162 u128 hi = mul64_128(a.hi, b.hi);
163 uint64_t m1 = mul64(a.hi, b.lo);
164 uint64_t m2 = mul64(a.lo, b.hi);
165 return add64(add64(hi, m1), m2);
166 }
167
168 /* returns a*b % 2^128. */
mul128_tail(u128 a,u128 b)169 static inline u128 mul128_tail(u128 a, u128 b)
170 {
171 u128 lo = mul64_128(a.lo, b.lo);
172 lo.hi += a.hi*b.lo + a.lo*b.hi;
173 return lo;
174 }
175
176
177 /* see sqrt.c for detailed comments. */
178
sqrtl(long double x)179 long double sqrtl(long double x)
180 {
181 u128 ix, ml;
182 uint64_t top;
183
184 ix = asu128(x);
185 top = ix.hi >> 48;
186 if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
187 /* x < 0x1p-16382 or inf or nan. */
188 if (2*ix.hi == 0 && ix.lo == 0)
189 return x;
190 if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
191 return x;
192 if (top >= 0x7fff)
193 return __math_invalidl(x);
194 /* x is subnormal, normalize it. */
195 ix = asu128(x * 0x1p112);
196 top = ix.hi >> 48;
197 top -= 112;
198 }
199
200 /* x = 4^e m; with int e and m in [1, 4) */
201 int even = top & 1;
202 ml = lsh(ix, 15);
203 ml.hi |= 0x8000000000000000;
204 if (even) ml = rsh(ml, 1);
205 top = (top + 0x3fff) >> 1;
206
207 /* r ~ 1/sqrt(m) */
208 static const uint64_t three = 0xc0000000;
209 uint64_t r, s, d, u, i;
210 i = (ix.hi >> 42) % 128;
211 r = (uint32_t)__rsqrt_tab[i] << 16;
212 /* |r sqrt(m) - 1| < 0x1p-8 */
213 s = mul32(ml.hi>>32, r);
214 d = mul32(s, r);
215 u = three - d;
216 r = mul32(u, r) << 1;
217 /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
218 r = r<<32;
219 s = mul64(ml.hi, r);
220 d = mul64(s, r);
221 u = (three<<32) - d;
222 r = mul64(u, r) << 1;
223 /* |r sqrt(m) - 1| < 0x1.a5p-31 */
224 s = mul64(u, s) << 1;
225 d = mul64(s, r);
226 u = (three<<32) - d;
227 r = mul64(u, r) << 1;
228 /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
229
230 static const u128 threel = {.hi=three<<32, .lo=0};
231 u128 rl, sl, dl, ul;
232 rl.hi = r;
233 rl.lo = 0;
234 sl = mul128(ml, rl);
235 dl = mul128(sl, rl);
236 ul = sub128(threel, dl);
237 sl = mul128(ul, sl); /* repr: 3.125 */
238 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
239 sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
240 /* s < sqrt(m) < s + 1 ULP + tiny */
241
242 long double y;
243 u128 d2, d1, d0;
244 d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
245 d1 = sub128(sl, d0);
246 d2 = add128(add64(sl, 1), d1);
247 sl = add64(sl, d1.hi >> 63);
248 y = mkldbl(top, sl);
249 if (FENV_SUPPORT) {
250 /* handle rounding modes and inexact exception. */
251 top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
252 top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
253 y += mkldbl(top, (u128){0});
254 }
255 return y;
256 }
257 #else
258 #error unsupported long double format
259 #endif
260