• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* @(#)e_sqrt.c 5.1 93/09/24 */
2 /*
3  * ====================================================
4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5  *
6  * Developed at SunPro, a Sun Microsystems, Inc. business.
7  * Permission to use, copy, modify, and distribute this
8  * software is freely granted, provided that this notice
9  * is preserved.
10  * ====================================================
11  */
12 
13 #if defined(LIBM_SCCS) && !defined(lint)
14 static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
15 #endif
16 
17 /* __ieee754_sqrt(x)
18  * Return correctly rounded sqrt.
19  *           ------------------------------------------
20  *	     |  Use the hardware sqrt if you have one |
21  *           ------------------------------------------
22  * Method:
23  *   Bit by bit method using integer arithmetic. (Slow, but portable)
24  *   1. Normalization
25  *	Scale x to y in [1,4) with even powers of 2:
26  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
27  *		sqrt(x) = 2^k * sqrt(y)
28  *   2. Bit by bit computation
29  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
30  *	     i							 0
31  *                                     i+1         2
32  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
33  *	     i      i            i                 i
34  *
35  *	To compute q    from q , one checks whether
36  *		    i+1       i
37  *
38  *			      -(i+1) 2
39  *			(q + 2      ) <= y.			(2)
40  *     			  i
41  *							      -(i+1)
42  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
43  *		 	       i+1   i             i+1   i
44  *
45  *	With some algebric manipulation, it is not difficult to see
46  *	that (2) is equivalent to
47  *                             -(i+1)
48  *			s  +  2       <= y			(3)
49  *			 i                i
50  *
51  *	The advantage of (3) is that s  and y  can be computed by
52  *				      i      i
53  *	the following recurrence formula:
54  *	    if (3) is false
55  *
56  *	    s     =  s  ,	y    = y   ;			(4)
57  *	     i+1      i		 i+1    i
58  *
59  *	    otherwise,
60  *                         -i                     -(i+1)
61  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
62  *           i+1      i          i+1    i     i
63  *
64  *	One may easily use induction to prove (4) and (5).
65  *	Note. Since the left hand side of (3) contain only i+2 bits,
66  *	      it does not necessary to do a full (53-bit) comparison
67  *	      in (3).
68  *   3. Final rounding
69  *	After generating the 53 bits result, we compute one more bit.
70  *	Together with the remainder, we can decide whether the
71  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
72  *	(it will never equal to 1/2ulp).
73  *	The rounding mode can be detected by checking whether
74  *	huge + tiny is equal to huge, and whether huge - tiny is
75  *	equal to huge for some floating point number "huge" and "tiny".
76  *
77  * Special cases:
78  *	sqrt(+-0) = +-0 	... exact
79  *	sqrt(inf) = inf
80  *	sqrt(-ve) = NaN		... with invalid signal
81  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
82  *
83  * Other methods : see the appended file at the end of the program below.
84  *---------------
85  */
86 
87 /*#include "math.h"*/
88 #include "math_private.h"
89 
90 #ifdef __STDC__
SDL_NAME(copysign)91 	double SDL_NAME(copysign)(double x, double y)
92 #else
93 	double SDL_NAME(copysign)(x,y)
94 	double x,y;
95 #endif
96 {
97 	u_int32_t hx,hy;
98 	GET_HIGH_WORD(hx,x);
99 	GET_HIGH_WORD(hy,y);
100 	SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000));
101         return x;
102 }
103 
104 #ifdef __STDC__
SDL_NAME(scalbn)105 	double SDL_NAME(scalbn) (double x, int n)
106 #else
107 	double SDL_NAME(scalbn) (x,n)
108 	double x; int n;
109 #endif
110 {
111 	int32_t k,hx,lx;
112 	EXTRACT_WORDS(hx,lx,x);
113         k = (hx&0x7ff00000)>>20;		/* extract exponent */
114         if (k==0) {				/* 0 or subnormal x */
115             if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
116 	    x *= two54;
117 	    GET_HIGH_WORD(hx,x);
118 	    k = ((hx&0x7ff00000)>>20) - 54;
119             if (n< -50000) return tiny*x; 	/*underflow*/
120 	    }
121         if (k==0x7ff) return x+x;		/* NaN or Inf */
122         k = k+n;
123         if (k >  0x7fe) return huge*SDL_NAME(copysign)(huge,x); /* overflow  */
124         if (k > 0) 				/* normal result */
125 	    {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
126         if (k <= -54) {
127             if (n > 50000) 	/* in case integer overflow in n+k */
128 		return huge*SDL_NAME(copysign)(huge,x);	/*overflow*/
129 	    else return tiny*SDL_NAME(copysign)(tiny,x); 	/*underflow*/
130 	}
131         k += 54;				/* subnormal result */
132 	SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
133         return x*twom54;
134 }
135 
136 #ifdef __STDC__
__ieee754_sqrt(double x)137 	double __ieee754_sqrt(double x)
138 #else
139 	double __ieee754_sqrt(x)
140 	double x;
141 #endif
142 {
143 	double z;
144 	int32_t sign = (int)0x80000000;
145 	int32_t ix0,s0,q,m,t,i;
146 	u_int32_t r,t1,s1,ix1,q1;
147 
148 	EXTRACT_WORDS(ix0,ix1,x);
149 
150     /* take care of Inf and NaN */
151 	if((ix0&0x7ff00000)==0x7ff00000) {
152 	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
153 					   sqrt(-inf)=sNaN */
154 	}
155     /* take care of zero */
156 	if(ix0<=0) {
157 	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
158 	    else if(ix0<0)
159 		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
160 	}
161     /* normalize x */
162 	m = (ix0>>20);
163 	if(m==0) {				/* subnormal x */
164 	    while(ix0==0) {
165 		m -= 21;
166 		ix0 |= (ix1>>11); ix1 <<= 21;
167 	    }
168 	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
169 	    m -= i-1;
170 	    ix0 |= (ix1>>(32-i));
171 	    ix1 <<= i;
172 	}
173 	m -= 1023;	/* unbias exponent */
174 	ix0 = (ix0&0x000fffff)|0x00100000;
175 	if(m&1){	/* odd m, double x to make it even */
176 	    ix0 += ix0 + ((ix1&sign)>>31);
177 	    ix1 += ix1;
178 	}
179 	m >>= 1;	/* m = [m/2] */
180 
181     /* generate sqrt(x) bit by bit */
182 	ix0 += ix0 + ((ix1&sign)>>31);
183 	ix1 += ix1;
184 	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
185 	r = 0x00200000;		/* r = moving bit from right to left */
186 
187 	while(r!=0) {
188 	    t = s0+r;
189 	    if(t<=ix0) {
190 		s0   = t+r;
191 		ix0 -= t;
192 		q   += r;
193 	    }
194 	    ix0 += ix0 + ((ix1&sign)>>31);
195 	    ix1 += ix1;
196 	    r>>=1;
197 	}
198 
199 	r = sign;
200 	while(r!=0) {
201 	    t1 = s1+r;
202 	    t  = s0;
203 	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
204 		s1  = t1+r;
205 		if(((int32_t)(t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
206 		ix0 -= t;
207 		if (ix1 < t1) ix0 -= 1;
208 		ix1 -= t1;
209 		q1  += r;
210 	    }
211 	    ix0 += ix0 + ((ix1&sign)>>31);
212 	    ix1 += ix1;
213 	    r>>=1;
214 	}
215 
216     /* use floating add to find out rounding direction */
217 	if((ix0|ix1)!=0) {
218 	    z = one-tiny; /* trigger inexact flag */
219 	    if (z>=one) {
220 	        z = one+tiny;
221 	        if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
222 		else if (z>one) {
223 		    if (q1==(u_int32_t)0xfffffffe) q+=1;
224 		    q1+=2;
225 		} else
226 	            q1 += (q1&1);
227 	    }
228 	}
229 	ix0 = (q>>1)+0x3fe00000;
230 	ix1 =  q1>>1;
231 	if ((q&1)==1) ix1 |= sign;
232 	ix0 += (m <<20);
233 	INSERT_WORDS(z,ix0,ix1);
234 	return z;
235 }
236 
237 /*
238 Other methods  (use floating-point arithmetic)
239 -------------
240 (This is a copy of a drafted paper by Prof W. Kahan
241 and K.C. Ng, written in May, 1986)
242 
243 	Two algorithms are given here to implement sqrt(x)
244 	(IEEE double precision arithmetic) in software.
245 	Both supply sqrt(x) correctly rounded. The first algorithm (in
246 	Section A) uses newton iterations and involves four divisions.
247 	The second one uses reciproot iterations to avoid division, but
248 	requires more multiplications. Both algorithms need the ability
249 	to chop results of arithmetic operations instead of round them,
250 	and the INEXACT flag to indicate when an arithmetic operation
251 	is executed exactly with no roundoff error, all part of the
252 	standard (IEEE 754-1985). The ability to perform shift, add,
253 	subtract and logical AND operations upon 32-bit words is needed
254 	too, though not part of the standard.
255 
256 A.  sqrt(x) by Newton Iteration
257 
258    (1)	Initial approximation
259 
260 	Let x0 and x1 be the leading and the trailing 32-bit words of
261 	a floating point number x (in IEEE double format) respectively
262 
263 	    1    11		     52				  ...widths
264 	   ------------------------------------------------------
265 	x: |s|	  e     |	      f				|
266 	   ------------------------------------------------------
267 	      msb    lsb  msb				      lsb ...order
268 
269 
270 	     ------------------------  	     ------------------------
271 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
272 	     ------------------------  	     ------------------------
273 
274 	By performing shifts and subtracts on x0 and x1 (both regarded
275 	as integers), we obtain an 8-bit approximation of sqrt(x) as
276 	follows.
277 
278 		k  := (x0>>1) + 0x1ff80000;
279 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
280 	Here k is a 32-bit integer and T1[] is an integer array containing
281 	correction terms. Now magically the floating value of y (y's
282 	leading 32-bit word is y0, the value of its trailing word is 0)
283 	approximates sqrt(x) to almost 8-bit.
284 
285 	Value of T1:
286 	static int T1[32]= {
287 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
288 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
289 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
290 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
291 
292     (2)	Iterative refinement
293 
294 	Apply Heron's rule three times to y, we have y approximates
295 	sqrt(x) to within 1 ulp (Unit in the Last Place):
296 
297 		y := (y+x/y)/2		... almost 17 sig. bits
298 		y := (y+x/y)/2		... almost 35 sig. bits
299 		y := y-(y-x/y)/2	... within 1 ulp
300 
301 
302 	Remark 1.
303 	    Another way to improve y to within 1 ulp is:
304 
305 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
306 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
307 
308 				2
309 			    (x-y )*y
310 		y := y + 2* ----------	...within 1 ulp
311 			       2
312 			     3y  + x
313 
314 
315 	This formula has one division fewer than the one above; however,
316 	it requires more multiplications and additions. Also x must be
317 	scaled in advance to avoid spurious overflow in evaluating the
318 	expression 3y*y+x. Hence it is not recommended uless division
319 	is slow. If division is very slow, then one should use the
320 	reciproot algorithm given in section B.
321 
322     (3) Final adjustment
323 
324 	By twiddling y's last bit it is possible to force y to be
325 	correctly rounded according to the prevailing rounding mode
326 	as follows. Let r and i be copies of the rounding mode and
327 	inexact flag before entering the square root program. Also we
328 	use the expression y+-ulp for the next representable floating
329 	numbers (up and down) of y. Note that y+-ulp = either fixed
330 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
331 	mode.
332 
333 		I := FALSE;	... reset INEXACT flag I
334 		R := RZ;	... set rounding mode to round-toward-zero
335 		z := x/y;	... chopped quotient, possibly inexact
336 		If(not I) then {	... if the quotient is exact
337 		    if(z=y) {
338 		        I := i;	 ... restore inexact flag
339 		        R := r;  ... restore rounded mode
340 		        return sqrt(x):=y.
341 		    } else {
342 			z := z - ulp;	... special rounding
343 		    }
344 		}
345 		i := TRUE;		... sqrt(x) is inexact
346 		If (r=RN) then z=z+ulp	... rounded-to-nearest
347 		If (r=RP) then {	... round-toward-+inf
348 		    y = y+ulp; z=z+ulp;
349 		}
350 		y := y+z;		... chopped sum
351 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
352 	        I := i;	 		... restore inexact flag
353 	        R := r;  		... restore rounded mode
354 	        return sqrt(x):=y.
355 
356     (4)	Special cases
357 
358 	Square root of +inf, +-0, or NaN is itself;
359 	Square root of a negative number is NaN with invalid signal.
360 
361 
362 B.  sqrt(x) by Reciproot Iteration
363 
364    (1)	Initial approximation
365 
366 	Let x0 and x1 be the leading and the trailing 32-bit words of
367 	a floating point number x (in IEEE double format) respectively
368 	(see section A). By performing shifs and subtracts on x0 and y0,
369 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
370 
371 	    k := 0x5fe80000 - (x0>>1);
372 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
373 
374 	Here k is a 32-bit integer and T2[] is an integer array
375 	containing correction terms. Now magically the floating
376 	value of y (y's leading 32-bit word is y0, the value of
377 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
378 	to almost 7.8-bit.
379 
380 	Value of T2:
381 	static int T2[64]= {
382 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
383 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
384 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
385 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
386 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
387 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
388 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
389 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
390 
391     (2)	Iterative refinement
392 
393 	Apply Reciproot iteration three times to y and multiply the
394 	result by x to get an approximation z that matches sqrt(x)
395 	to about 1 ulp. To be exact, we will have
396 		-1ulp < sqrt(x)-z<1.0625ulp.
397 
398 	... set rounding mode to Round-to-nearest
399 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
400 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
401 	... special arrangement for better accuracy
402 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
403 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
404 
405 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
406 	(a) the term z*y in the final iteration is always less than 1;
407 	(b) the error in the final result is biased upward so that
408 		-1 ulp < sqrt(x) - z < 1.0625 ulp
409 	    instead of |sqrt(x)-z|<1.03125ulp.
410 
411     (3)	Final adjustment
412 
413 	By twiddling y's last bit it is possible to force y to be
414 	correctly rounded according to the prevailing rounding mode
415 	as follows. Let r and i be copies of the rounding mode and
416 	inexact flag before entering the square root program. Also we
417 	use the expression y+-ulp for the next representable floating
418 	numbers (up and down) of y. Note that y+-ulp = either fixed
419 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
420 	mode.
421 
422 	R := RZ;		... set rounding mode to round-toward-zero
423 	switch(r) {
424 	    case RN:		... round-to-nearest
425 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
426 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
427 	       break;
428 	    case RZ:case RM:	... round-to-zero or round-to--inf
429 	       R:=RP;		... reset rounding mod to round-to-+inf
430 	       if(x<z*z ... rounded up) z = z - ulp; else
431 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
432 	       break;
433 	    case RP:		... round-to-+inf
434 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
435 	       if(x>z*z ...chopped) z = z+ulp;
436 	       break;
437 	}
438 
439 	Remark 3. The above comparisons can be done in fixed point. For
440 	example, to compare x and w=z*z chopped, it suffices to compare
441 	x1 and w1 (the trailing parts of x and w), regarding them as
442 	two's complement integers.
443 
444 	...Is z an exact square root?
445 	To determine whether z is an exact square root of x, let z1 be the
446 	trailing part of z, and also let x0 and x1 be the leading and
447 	trailing parts of x.
448 
449 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
450 	    I := 1;		... Raise Inexact flag: z is not exact
451 	else {
452 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
453 	    k := z1 >> 26;		... get z's 25-th and 26-th
454 					    fraction bits
455 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
456 	}
457 	R:= r		... restore rounded mode
458 	return sqrt(x):=z.
459 
460 	If multiplication is cheaper then the foregoing red tape, the
461 	Inexact flag can be evaluated by
462 
463 	    I := i;
464 	    I := (z*z!=x) or I.
465 
466 	Note that z*z can overwrite I; this value must be sensed if it is
467 	True.
468 
469 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
470 	zero.
471 
472 		    --------------------
473 		z1: |        f2        |
474 		    --------------------
475 		bit 31		   bit 0
476 
477 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
478 	or even of logb(x) have the following relations:
479 
480 	-------------------------------------------------
481 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
482 	-------------------------------------------------
483 	00			00		odd and even
484 	01			01		even
485 	10			10		odd
486 	10			00		even
487 	11			01		even
488 	-------------------------------------------------
489 
490     (4)	Special cases (see (4) of Section A).
491 
492  */
493 
494