1 /* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *
15 * This file is based on work under the following copyright and permission
16 * notice:
17 *
18 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
19 *
20 * Developed at SunSoft, a Sun Microsystems, Inc. business.
21 * Permission to use, copy, modify, and distribute this
22 * software is freely granted, provided that this notice
23 * is preserved.
24 *
25 * @(#)e_sqrt.c 1.3 95/01/18
26 */
27
28 #include "jerry-libm-internal.h"
29
30 /* sqrt(x)
31 * Return correctly rounded sqrt.
32 *
33 * ------------------------------------------
34 * | Use the hardware sqrt if you have one |
35 * ------------------------------------------
36 *
37 * Method:
38 * Bit by bit method using integer arithmetic. (Slow, but portable)
39 * 1. Normalization
40 * Scale x to y in [1,4) with even powers of 2:
41 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
42 * sqrt(x) = 2^k * sqrt(y)
43 * 2. Bit by bit computation
44 * Let q = sqrt(y) truncated to i bit after binary point (q = 1),
45 * i 0
46 * i+1 2
47 * s = 2*q , and y = 2 * ( y - q ). (1)
48 * i i i i
49 *
50 * To compute q from q , one checks whether
51 * i+1 i
52 *
53 * -(i+1) 2
54 * (q + 2 ) <= y. (2)
55 * i
56 * -(i+1)
57 * If (2) is false, then q = q ; otherwise q = q + 2 .
58 * i+1 i i+1 i
59 *
60 * With some algebric manipulation, it is not difficult to see
61 * that (2) is equivalent to
62 * -(i+1)
63 * s + 2 <= y (3)
64 * i i
65 *
66 * The advantage of (3) is that s and y can be computed by
67 * i i
68 * the following recurrence formula:
69 * if (3) is false
70 *
71 * s = s , y = y ; (4)
72 * i+1 i i+1 i
73 *
74 * otherwise,
75 * -i -(i+1)
76 * s = s + 2 , y = y - s - 2 (5)
77 * i+1 i i+1 i i
78 *
79 * One may easily use induction to prove (4) and (5).
80 * Note. Since the left hand side of (3) contain only i+2 bits,
81 * it does not necessary to do a full (53-bit) comparison
82 * in (3).
83 * 3. Final rounding
84 * After generating the 53 bits result, we compute one more bit.
85 * Together with the remainder, we can decide whether the
86 * result is exact, bigger than 1/2ulp, or less than 1/2ulp
87 * (it will never equal to 1/2ulp).
88 * The rounding mode can be detected by checking whether
89 * huge + tiny is equal to huge, and whether huge - tiny is
90 * equal to huge for some floating point number "huge" and "tiny".
91 *
92 * Special cases:
93 * sqrt(+-0) = +-0 ... exact
94 * sqrt(inf) = inf
95 * sqrt(-ve) = NaN ... with invalid signal
96 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN
97 *
98 * Other methods: see the appended file at the end of the program below.
99 */
100
101 #define one 1.0
102 #define tiny 1.0e-300
103
104 double
sqrt(double x)105 sqrt (double x)
106 {
107 int sign = (int) 0x80000000;
108 unsigned r, t1, s1, ix1, q1;
109 int ix0, s0, q, m, t, i;
110
111 ix0 = __HI (x); /* high word of x */
112 ix1 = __LO (x); /* low word of x */
113
114 /* take care of Inf and NaN */
115 if ((ix0 & 0x7ff00000) == 0x7ff00000)
116 {
117 return x * x + x; /* sqrt(NaN) = NaN, sqrt(+inf) = +inf, sqrt(-inf) = sNaN */
118 }
119 /* take care of zero */
120 if (ix0 <= 0)
121 {
122 if (((ix0 & (~sign)) | ix1) == 0) /* sqrt(+-0) = +-0 */
123 {
124 return x;
125 }
126 else if (ix0 < 0) /* sqrt(-ve) = sNaN */
127 {
128 return NAN;
129 }
130 }
131 /* normalize x */
132 m = (ix0 >> 20);
133 if (m == 0) /* subnormal x */
134 {
135 while (ix0 == 0)
136 {
137 m -= 21;
138 ix0 |= (ix1 >> 11);
139 ix1 <<= 21;
140 }
141 for (i = 0; (ix0 & 0x00100000) == 0; i++)
142 {
143 ix0 <<= 1;
144 }
145 m -= i - 1;
146 ix0 |= (ix1 >> (32 - i));
147 ix1 <<= i;
148 }
149 m -= 1023; /* unbias exponent */
150 ix0 = (ix0 & 0x000fffff) | 0x00100000;
151 if (m & 1) /* odd m, double x to make it even */
152 {
153 ix0 += ix0 + ((ix1 & sign) >> 31);
154 ix1 += ix1;
155 }
156 m >>= 1; /* m = [m / 2] */
157
158 /* generate sqrt(x) bit by bit */
159 ix0 += ix0 + ((ix1 & sign) >> 31);
160 ix1 += ix1;
161 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
162 r = 0x00200000; /* r = moving bit from right to left */
163
164 while (r != 0)
165 {
166 t = s0 + r;
167 if (t <= ix0)
168 {
169 s0 = t + r;
170 ix0 -= t;
171 q += r;
172 }
173 ix0 += ix0 + ((ix1 & sign) >> 31);
174 ix1 += ix1;
175 r >>= 1;
176 }
177
178 r = sign;
179 while (r != 0)
180 {
181 t1 = s1 + r;
182 t = s0;
183 if ((t < ix0) || ((t == ix0) && (t1 <= ix1)))
184 {
185 s1 = t1 + r;
186 if (((t1 & sign) == sign) && (s1 & sign) == 0)
187 {
188 s0 += 1;
189 }
190 ix0 -= t;
191 if (ix1 < t1)
192 {
193 ix0 -= 1;
194 }
195 ix1 -= t1;
196 q1 += r;
197 }
198 ix0 += ix0 + ((ix1 & sign) >> 31);
199 ix1 += ix1;
200 r >>= 1;
201 }
202
203 double_accessor ret;
204
205 /* use floating add to find out rounding direction */
206 if ((ix0 | ix1) != 0)
207 {
208 ret.dbl = one - tiny; /* trigger inexact flag */
209 if (ret.dbl >= one)
210 {
211 ret.dbl = one + tiny;
212 if (q1 == (unsigned) 0xffffffff)
213 {
214 q1 = 0;
215 q += 1;
216 }
217 else if (ret.dbl > one)
218 {
219 if (q1 == (unsigned) 0xfffffffe)
220 {
221 q += 1;
222 }
223 q1 += 2;
224 }
225 else
226 {
227 q1 += (q1 & 1);
228 }
229 }
230 }
231 ix0 = (q >> 1) + 0x3fe00000;
232 ix1 = q1 >> 1;
233 if ((q & 1) == 1)
234 {
235 ix1 |= sign;
236 }
237 ix0 += (m << 20);
238 ret.as_int.hi = ix0;
239 ret.as_int.lo = ix1;
240 return ret.dbl;
241 } /* sqrt */
242
243 #undef one
244 #undef tiny
245
246 /*
247 Other methods (use floating-point arithmetic)
248 -------------
249 (This is a copy of a drafted paper by Prof W. Kahan
250 and K.C. Ng, written in May, 1986)
251
252 Two algorithms are given here to implement sqrt(x)
253 (IEEE double precision arithmetic) in software.
254 Both supply sqrt(x) correctly rounded. The first algorithm (in
255 Section A) uses newton iterations and involves four divisions.
256 The second one uses reciproot iterations to avoid division, but
257 requires more multiplications. Both algorithms need the ability
258 to chop results of arithmetic operations instead of round them,
259 and the INEXACT flag to indicate when an arithmetic operation
260 is executed exactly with no roundoff error, all part of the
261 standard (IEEE 754-1985). The ability to perform shift, add,
262 subtract and logical AND operations upon 32-bit words is needed
263 too, though not part of the standard.
264
265 A. sqrt(x) by Newton Iteration
266
267 (1) Initial approximation
268
269 Let x0 and x1 be the leading and the trailing 32-bit words of
270 a floating point number x (in IEEE double format) respectively
271
272 1 11 52 ...widths
273 ------------------------------------------------------
274 x: |s| e | f |
275 ------------------------------------------------------
276 msb lsb msb lsb ...order
277
278 ------------------------ ------------------------
279 x0: |s| e | f1 | x1: | f2 |
280 ------------------------ ------------------------
281
282 By performing shifts and subtracts on x0 and x1 (both regarded
283 as integers), we obtain an 8-bit approximation of sqrt(x) as
284 follows.
285
286 k := (x0>>1) + 0x1ff80000;
287 y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
288 Here k is a 32-bit integer and T1[] is an integer array containing
289 correction terms. Now magically the floating value of y (y's
290 leading 32-bit word is y0, the value of its trailing word is 0)
291 approximates sqrt(x) to almost 8-bit.
292
293 Value of T1:
294 static int T1[32]= {
295 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
296 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
297 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
298 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
299
300 (2) Iterative refinement
301
302 Apply Heron's rule three times to y, we have y approximates
303 sqrt(x) to within 1 ulp (Unit in the Last Place):
304
305 y := (y+x/y)/2 ... almost 17 sig. bits
306 y := (y+x/y)/2 ... almost 35 sig. bits
307 y := y-(y-x/y)/2 ... within 1 ulp
308
309 Remark 1.
310 Another way to improve y to within 1 ulp is:
311
312 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
313 y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
314
315 2
316 (x-y )*y
317 y := y + 2* ---------- ...within 1 ulp
318 2
319 3y + x
320
321 This formula has one division fewer than the one above; however,
322 it requires more multiplications and additions. Also x must be
323 scaled in advance to avoid spurious overflow in evaluating the
324 expression 3y*y+x. Hence it is not recommended uless division
325 is slow. If division is very slow, then one should use the
326 reciproot algorithm given in section B.
327
328 (3) Final adjustment
329
330 By twiddling y's last bit it is possible to force y to be
331 correctly rounded according to the prevailing rounding mode
332 as follows. Let r and i be copies of the rounding mode and
333 inexact flag before entering the square root program. Also we
334 use the expression y+-ulp for the next representable floating
335 numbers (up and down) of y. Note that y+-ulp = either fixed
336 point y+-1, or multiply y by nextafter(1,+-inf) in chopped
337 mode.
338
339 I := FALSE; ... reset INEXACT flag I
340 R := RZ; ... set rounding mode to round-toward-zero
341 z := x/y; ... chopped quotient, possibly inexact
342 If(not I) then { ... if the quotient is exact
343 if(z=y) {
344 I := i; ... restore inexact flag
345 R := r; ... restore rounded mode
346 return sqrt(x):=y.
347 } else {
348 z := z - ulp; ... special rounding
349 }
350 }
351 i := TRUE; ... sqrt(x) is inexact
352 If (r=RN) then z=z+ulp ... rounded-to-nearest
353 If (r=RP) then { ... round-toward-+inf
354 y = y+ulp; z=z+ulp;
355 }
356 y := y+z; ... chopped sum
357 y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
358 I := i; ... restore inexact flag
359 R := r; ... restore rounded mode
360 return sqrt(x):=y.
361
362 (4) Special cases
363
364 Square root of +inf, +-0, or NaN is itself;
365 Square root of a negative number is NaN with invalid signal.
366
367 B. sqrt(x) by Reciproot Iteration
368
369 (1) Initial approximation
370
371 Let x0 and x1 be the leading and the trailing 32-bit words of
372 a floating point number x (in IEEE double format) respectively
373 (see section A). By performing shifs and subtracts on x0 and y0,
374 we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
375
376 k := 0x5fe80000 - (x0>>1);
377 y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
378
379 Here k is a 32-bit integer and T2[] is an integer array
380 containing correction terms. Now magically the floating
381 value of y (y's leading 32-bit word is y0, the value of
382 its trailing word y1 is set to zero) approximates 1/sqrt(x)
383 to almost 7.8-bit.
384
385 Value of T2:
386 static int T2[64]= {
387 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
388 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
389 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
390 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
391 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
392 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
393 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
394 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
395
396 (2) Iterative refinement
397
398 Apply Reciproot iteration three times to y and multiply the
399 result by x to get an approximation z that matches sqrt(x)
400 to about 1 ulp. To be exact, we will have
401 -1ulp < sqrt(x)-z<1.0625ulp.
402
403 ... set rounding mode to Round-to-nearest
404 y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x)
405 y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
406 ... special arrangement for better accuracy
407 z := x*y ... 29 bits to sqrt(x), with z*y<1
408 z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x)
409
410 Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
411 (a) the term z*y in the final iteration is always less than 1;
412 (b) the error in the final result is biased upward so that
413 -1 ulp < sqrt(x) - z < 1.0625 ulp
414 instead of |sqrt(x)-z|<1.03125ulp.
415
416 (3) Final adjustment
417
418 By twiddling y's last bit it is possible to force y to be
419 correctly rounded according to the prevailing rounding mode
420 as follows. Let r and i be copies of the rounding mode and
421 inexact flag before entering the square root program. Also we
422 use the expression y+-ulp for the next representable floating
423 numbers (up and down) of y. Note that y+-ulp = either fixed
424 point y+-1, or multiply y by nextafter(1,+-inf) in chopped
425 mode.
426
427 R := RZ; ... set rounding mode to round-toward-zero
428 switch(r) {
429 case RN: ... round-to-nearest
430 if(x<= z*(z-ulp)...chopped) z = z - ulp; else
431 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
432 break;
433 case RZ:case RM: ... round-to-zero or round-to--inf
434 R:=RP; ... reset rounding mod to round-to-+inf
435 if(x<z*z ... rounded up) z = z - ulp; else
436 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
437 break;
438 case RP: ... round-to-+inf
439 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
440 if(x>z*z ...chopped) z = z+ulp;
441 break;
442 }
443
444 Remark 3. The above comparisons can be done in fixed point. For
445 example, to compare x and w=z*z chopped, it suffices to compare
446 x1 and w1 (the trailing parts of x and w), regarding them as
447 two's complement integers.
448
449 ...Is z an exact square root?
450 To determine whether z is an exact square root of x, let z1 be the
451 trailing part of z, and also let x0 and x1 be the leading and
452 trailing parts of x.
453
454 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
455 I := 1; ... Raise Inexact flag: z is not exact
456 else {
457 j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
458 k := z1 >> 26; ... get z's 25-th and 26-th
459 fraction bits
460 I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
461 }
462 R:= r ... restore rounded mode
463 return sqrt(x):=z.
464
465 If multiplication is cheaper then the foregoing red tape, the
466 Inexact flag can be evaluated by
467
468 I := i;
469 I := (z*z!=x) or I.
470
471 Note that z*z can overwrite I; this value must be sensed if it is
472 True.
473
474 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
475 zero.
476
477 --------------------
478 z1: | f2 |
479 --------------------
480 bit 31 bit 0
481
482 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
483 or even of logb(x) have the following relations:
484
485 -------------------------------------------------
486 bit 27,26 of z1 bit 1,0 of x1 logb(x)
487 -------------------------------------------------
488 00 00 odd and even
489 01 01 even
490 10 10 odd
491 10 00 even
492 11 01 even
493 -------------------------------------------------
494
495 (4) Special cases (see (4) of Section A).
496 */
497