• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * QEMU float support macros
3  *
4  * Derived from SoftFloat.
5  */
6 
7 /*============================================================================
8 
9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10 Arithmetic Package, Release 2b.
11 
12 Written by John R. Hauser.  This work was made possible in part by the
13 International Computer Science Institute, located at Suite 600, 1947 Center
14 Street, Berkeley, California 94704.  Funding was partially provided by the
15 National Science Foundation under grant MIP-9311980.  The original version
16 of this code was written as part of a project to build a fixed-point vector
17 processor in collaboration with the University of California at Berkeley,
18 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 arithmetic/SoftFloat.html'.
21 
22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 
31 Derivative works are acceptable, even for commercial purposes, so long as
32 (1) the source code for the derivative work includes prominent notice that
33 the work is derivative, and (2) the source code includes prominent notice with
34 these four paragraphs for those parts of this code that are retained.
35 
36 =============================================================================*/
37 
38 /*----------------------------------------------------------------------------
39 | This macro tests for minimum version of the GNU C compiler.
40 *----------------------------------------------------------------------------*/
41 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
42 # define SOFTFLOAT_GNUC_PREREQ(maj, min) \
43          ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
44 #else
45 # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
46 #endif
47 
48 
49 /*----------------------------------------------------------------------------
50 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
51 | bits are shifted off, they are ``jammed'' into the least significant bit of
52 | the result by setting the least significant bit to 1.  The value of `count'
53 | can be arbitrarily large; in particular, if `count' is greater than 32, the
54 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
55 | The result is stored in the location pointed to by `zPtr'.
56 *----------------------------------------------------------------------------*/
57 
shift32RightJamming(uint32_t a,int16 count,uint32_t * zPtr)58 INLINE void shift32RightJamming( uint32_t a, int16 count, uint32_t *zPtr )
59 {
60     uint32_t z;
61 
62     if ( count == 0 ) {
63         z = a;
64     }
65     else if ( count < 32 ) {
66         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
67     }
68     else {
69         z = ( a != 0 );
70     }
71     *zPtr = z;
72 
73 }
74 
75 /*----------------------------------------------------------------------------
76 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
77 | bits are shifted off, they are ``jammed'' into the least significant bit of
78 | the result by setting the least significant bit to 1.  The value of `count'
79 | can be arbitrarily large; in particular, if `count' is greater than 64, the
80 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
81 | The result is stored in the location pointed to by `zPtr'.
82 *----------------------------------------------------------------------------*/
83 
shift64RightJamming(uint64_t a,int16 count,uint64_t * zPtr)84 INLINE void shift64RightJamming( uint64_t a, int16 count, uint64_t *zPtr )
85 {
86     uint64_t z;
87 
88     if ( count == 0 ) {
89         z = a;
90     }
91     else if ( count < 64 ) {
92         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
93     }
94     else {
95         z = ( a != 0 );
96     }
97     *zPtr = z;
98 
99 }
100 
101 /*----------------------------------------------------------------------------
102 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
103 | _plus_ the number of bits given in `count'.  The shifted result is at most
104 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
105 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
106 | shifted off is the most-significant bit of the extra result, and the other
107 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
108 | bits shifted off were all zero.  This extra result is stored in the location
109 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
110 |     (This routine makes more sense if `a0' and `a1' are considered to form
111 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
112 | point value is shifted right by the number of bits given in `count', and
113 | the integer part of the result is returned at the location pointed to by
114 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
115 | described above, and is returned at the location pointed to by `z1Ptr'.)
116 *----------------------------------------------------------------------------*/
117 
118 INLINE void
shift64ExtraRightJamming(uint64_t a0,uint64_t a1,int16 count,uint64_t * z0Ptr,uint64_t * z1Ptr)119  shift64ExtraRightJamming(
120      uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
121 {
122     uint64_t z0, z1;
123     int8 negCount = ( - count ) & 63;
124 
125     if ( count == 0 ) {
126         z1 = a1;
127         z0 = a0;
128     }
129     else if ( count < 64 ) {
130         z1 = ( a0<<negCount ) | ( a1 != 0 );
131         z0 = a0>>count;
132     }
133     else {
134         if ( count == 64 ) {
135             z1 = a0 | ( a1 != 0 );
136         }
137         else {
138             z1 = ( ( a0 | a1 ) != 0 );
139         }
140         z0 = 0;
141     }
142     *z1Ptr = z1;
143     *z0Ptr = z0;
144 
145 }
146 
147 /*----------------------------------------------------------------------------
148 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
149 | number of bits given in `count'.  Any bits shifted off are lost.  The value
150 | of `count' can be arbitrarily large; in particular, if `count' is greater
151 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
152 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
153 *----------------------------------------------------------------------------*/
154 
155 INLINE void
shift128Right(uint64_t a0,uint64_t a1,int16 count,uint64_t * z0Ptr,uint64_t * z1Ptr)156  shift128Right(
157      uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
158 {
159     uint64_t z0, z1;
160     int8 negCount = ( - count ) & 63;
161 
162     if ( count == 0 ) {
163         z1 = a1;
164         z0 = a0;
165     }
166     else if ( count < 64 ) {
167         z1 = ( a0<<negCount ) | ( a1>>count );
168         z0 = a0>>count;
169     }
170     else {
171         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
172         z0 = 0;
173     }
174     *z1Ptr = z1;
175     *z0Ptr = z0;
176 
177 }
178 
179 /*----------------------------------------------------------------------------
180 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
181 | number of bits given in `count'.  If any nonzero bits are shifted off, they
182 | are ``jammed'' into the least significant bit of the result by setting the
183 | least significant bit to 1.  The value of `count' can be arbitrarily large;
184 | in particular, if `count' is greater than 128, the result will be either
185 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
186 | nonzero.  The result is broken into two 64-bit pieces which are stored at
187 | the locations pointed to by `z0Ptr' and `z1Ptr'.
188 *----------------------------------------------------------------------------*/
189 
190 INLINE void
shift128RightJamming(uint64_t a0,uint64_t a1,int16 count,uint64_t * z0Ptr,uint64_t * z1Ptr)191  shift128RightJamming(
192      uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
193 {
194     uint64_t z0, z1;
195     int8 negCount = ( - count ) & 63;
196 
197     if ( count == 0 ) {
198         z1 = a1;
199         z0 = a0;
200     }
201     else if ( count < 64 ) {
202         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
203         z0 = a0>>count;
204     }
205     else {
206         if ( count == 64 ) {
207             z1 = a0 | ( a1 != 0 );
208         }
209         else if ( count < 128 ) {
210             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
211         }
212         else {
213             z1 = ( ( a0 | a1 ) != 0 );
214         }
215         z0 = 0;
216     }
217     *z1Ptr = z1;
218     *z0Ptr = z0;
219 
220 }
221 
222 /*----------------------------------------------------------------------------
223 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
224 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
225 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
226 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
227 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
228 | the most-significant bit of the extra result, and the other 63 bits of the
229 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
230 | were all zero.  This extra result is stored in the location pointed to by
231 | `z2Ptr'.  The value of `count' can be arbitrarily large.
232 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
233 | to form a fixed-point value with binary point between `a1' and `a2'.  This
234 | fixed-point value is shifted right by the number of bits given in `count',
235 | and the integer part of the result is returned at the locations pointed to
236 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
237 | corrupted as described above, and is returned at the location pointed to by
238 | `z2Ptr'.)
239 *----------------------------------------------------------------------------*/
240 
241 INLINE void
shift128ExtraRightJamming(uint64_t a0,uint64_t a1,uint64_t a2,int16 count,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)242  shift128ExtraRightJamming(
243      uint64_t a0,
244      uint64_t a1,
245      uint64_t a2,
246      int16 count,
247      uint64_t *z0Ptr,
248      uint64_t *z1Ptr,
249      uint64_t *z2Ptr
250  )
251 {
252     uint64_t z0, z1, z2;
253     int8 negCount = ( - count ) & 63;
254 
255     if ( count == 0 ) {
256         z2 = a2;
257         z1 = a1;
258         z0 = a0;
259     }
260     else {
261         if ( count < 64 ) {
262             z2 = a1<<negCount;
263             z1 = ( a0<<negCount ) | ( a1>>count );
264             z0 = a0>>count;
265         }
266         else {
267             if ( count == 64 ) {
268                 z2 = a1;
269                 z1 = a0;
270             }
271             else {
272                 a2 |= a1;
273                 if ( count < 128 ) {
274                     z2 = a0<<negCount;
275                     z1 = a0>>( count & 63 );
276                 }
277                 else {
278                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
279                     z1 = 0;
280                 }
281             }
282             z0 = 0;
283         }
284         z2 |= ( a2 != 0 );
285     }
286     *z2Ptr = z2;
287     *z1Ptr = z1;
288     *z0Ptr = z0;
289 
290 }
291 
292 /*----------------------------------------------------------------------------
293 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
294 | number of bits given in `count'.  Any bits shifted off are lost.  The value
295 | of `count' must be less than 64.  The result is broken into two 64-bit
296 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
297 *----------------------------------------------------------------------------*/
298 
299 INLINE void
shortShift128Left(uint64_t a0,uint64_t a1,int16 count,uint64_t * z0Ptr,uint64_t * z1Ptr)300  shortShift128Left(
301      uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
302 {
303 
304     *z1Ptr = a1<<count;
305     *z0Ptr =
306         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
307 
308 }
309 
310 /*----------------------------------------------------------------------------
311 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
312 | by the number of bits given in `count'.  Any bits shifted off are lost.
313 | The value of `count' must be less than 64.  The result is broken into three
314 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
315 | `z1Ptr', and `z2Ptr'.
316 *----------------------------------------------------------------------------*/
317 
318 INLINE void
shortShift192Left(uint64_t a0,uint64_t a1,uint64_t a2,int16 count,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)319  shortShift192Left(
320      uint64_t a0,
321      uint64_t a1,
322      uint64_t a2,
323      int16 count,
324      uint64_t *z0Ptr,
325      uint64_t *z1Ptr,
326      uint64_t *z2Ptr
327  )
328 {
329     uint64_t z0, z1, z2;
330     int8 negCount;
331 
332     z2 = a2<<count;
333     z1 = a1<<count;
334     z0 = a0<<count;
335     if ( 0 < count ) {
336         negCount = ( ( - count ) & 63 );
337         z1 |= a2>>negCount;
338         z0 |= a1>>negCount;
339     }
340     *z2Ptr = z2;
341     *z1Ptr = z1;
342     *z0Ptr = z0;
343 
344 }
345 
346 /*----------------------------------------------------------------------------
347 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
348 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
349 | any carry out is lost.  The result is broken into two 64-bit pieces which
350 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
351 *----------------------------------------------------------------------------*/
352 
353 INLINE void
add128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1,uint64_t * z0Ptr,uint64_t * z1Ptr)354  add128(
355      uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
356 {
357     uint64_t z1;
358 
359     z1 = a1 + b1;
360     *z1Ptr = z1;
361     *z0Ptr = a0 + b0 + ( z1 < a1 );
362 
363 }
364 
365 /*----------------------------------------------------------------------------
366 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
367 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
368 | modulo 2^192, so any carry out is lost.  The result is broken into three
369 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
370 | `z1Ptr', and `z2Ptr'.
371 *----------------------------------------------------------------------------*/
372 
373 INLINE void
add192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)374  add192(
375      uint64_t a0,
376      uint64_t a1,
377      uint64_t a2,
378      uint64_t b0,
379      uint64_t b1,
380      uint64_t b2,
381      uint64_t *z0Ptr,
382      uint64_t *z1Ptr,
383      uint64_t *z2Ptr
384  )
385 {
386     uint64_t z0, z1, z2;
387     int8 carry0, carry1;
388 
389     z2 = a2 + b2;
390     carry1 = ( z2 < a2 );
391     z1 = a1 + b1;
392     carry0 = ( z1 < a1 );
393     z0 = a0 + b0;
394     z1 += carry1;
395     z0 += ( z1 < carry1 );
396     z0 += carry0;
397     *z2Ptr = z2;
398     *z1Ptr = z1;
399     *z0Ptr = z0;
400 
401 }
402 
403 /*----------------------------------------------------------------------------
404 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
405 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
406 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
407 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
408 | `z1Ptr'.
409 *----------------------------------------------------------------------------*/
410 
411 INLINE void
sub128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1,uint64_t * z0Ptr,uint64_t * z1Ptr)412  sub128(
413      uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
414 {
415 
416     *z1Ptr = a1 - b1;
417     *z0Ptr = a0 - b0 - ( a1 < b1 );
418 
419 }
420 
421 /*----------------------------------------------------------------------------
422 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
423 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
424 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
425 | result is broken into three 64-bit pieces which are stored at the locations
426 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
427 *----------------------------------------------------------------------------*/
428 
429 INLINE void
sub192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)430  sub192(
431      uint64_t a0,
432      uint64_t a1,
433      uint64_t a2,
434      uint64_t b0,
435      uint64_t b1,
436      uint64_t b2,
437      uint64_t *z0Ptr,
438      uint64_t *z1Ptr,
439      uint64_t *z2Ptr
440  )
441 {
442     uint64_t z0, z1, z2;
443     int8 borrow0, borrow1;
444 
445     z2 = a2 - b2;
446     borrow1 = ( a2 < b2 );
447     z1 = a1 - b1;
448     borrow0 = ( a1 < b1 );
449     z0 = a0 - b0;
450     z0 -= ( z1 < borrow1 );
451     z1 -= borrow1;
452     z0 -= borrow0;
453     *z2Ptr = z2;
454     *z1Ptr = z1;
455     *z0Ptr = z0;
456 
457 }
458 
459 /*----------------------------------------------------------------------------
460 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
461 | into two 64-bit pieces which are stored at the locations pointed to by
462 | `z0Ptr' and `z1Ptr'.
463 *----------------------------------------------------------------------------*/
464 
mul64To128(uint64_t a,uint64_t b,uint64_t * z0Ptr,uint64_t * z1Ptr)465 INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
466 {
467     uint32_t aHigh, aLow, bHigh, bLow;
468     uint64_t z0, zMiddleA, zMiddleB, z1;
469 
470     aLow = a;
471     aHigh = a>>32;
472     bLow = b;
473     bHigh = b>>32;
474     z1 = ( (uint64_t) aLow ) * bLow;
475     zMiddleA = ( (uint64_t) aLow ) * bHigh;
476     zMiddleB = ( (uint64_t) aHigh ) * bLow;
477     z0 = ( (uint64_t) aHigh ) * bHigh;
478     zMiddleA += zMiddleB;
479     z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
480     zMiddleA <<= 32;
481     z1 += zMiddleA;
482     z0 += ( z1 < zMiddleA );
483     *z1Ptr = z1;
484     *z0Ptr = z0;
485 
486 }
487 
488 /*----------------------------------------------------------------------------
489 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
490 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
491 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
492 | `z2Ptr'.
493 *----------------------------------------------------------------------------*/
494 
495 INLINE void
mul128By64To192(uint64_t a0,uint64_t a1,uint64_t b,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)496  mul128By64To192(
497      uint64_t a0,
498      uint64_t a1,
499      uint64_t b,
500      uint64_t *z0Ptr,
501      uint64_t *z1Ptr,
502      uint64_t *z2Ptr
503  )
504 {
505     uint64_t z0, z1, z2, more1;
506 
507     mul64To128( a1, b, &z1, &z2 );
508     mul64To128( a0, b, &z0, &more1 );
509     add128( z0, more1, 0, z1, &z0, &z1 );
510     *z2Ptr = z2;
511     *z1Ptr = z1;
512     *z0Ptr = z0;
513 
514 }
515 
516 /*----------------------------------------------------------------------------
517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519 | product.  The product is broken into four 64-bit pieces which are stored at
520 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521 *----------------------------------------------------------------------------*/
522 
523 INLINE void
mul128To256(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr,uint64_t * z3Ptr)524  mul128To256(
525      uint64_t a0,
526      uint64_t a1,
527      uint64_t b0,
528      uint64_t b1,
529      uint64_t *z0Ptr,
530      uint64_t *z1Ptr,
531      uint64_t *z2Ptr,
532      uint64_t *z3Ptr
533  )
534 {
535     uint64_t z0, z1, z2, z3;
536     uint64_t more1, more2;
537 
538     mul64To128( a1, b1, &z2, &z3 );
539     mul64To128( a1, b0, &z1, &more2 );
540     add128( z1, more2, 0, z2, &z1, &z2 );
541     mul64To128( a0, b0, &z0, &more1 );
542     add128( z0, more1, 0, z1, &z0, &z1 );
543     mul64To128( a0, b1, &more1, &more2 );
544     add128( more1, more2, 0, z2, &more1, &z2 );
545     add128( z0, z1, 0, more1, &z0, &z1 );
546     *z3Ptr = z3;
547     *z2Ptr = z2;
548     *z1Ptr = z1;
549     *z0Ptr = z0;
550 
551 }
552 
553 /*----------------------------------------------------------------------------
554 | Returns an approximation to the 64-bit integer quotient obtained by dividing
555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
556 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
557 | toward zero, the approximation returned lies between q and q + 2 inclusive.
558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559 | unsigned integer is returned.
560 *----------------------------------------------------------------------------*/
561 
estimateDiv128To64(uint64_t a0,uint64_t a1,uint64_t b)562 static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
563 {
564     uint64_t b0, b1;
565     uint64_t rem0, rem1, term0, term1;
566     uint64_t z;
567 
568     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569     b0 = b>>32;
570     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571     mul64To128( b, z, &term0, &term1 );
572     sub128( a0, a1, term0, term1, &rem0, &rem1 );
573     while ( ( (int64_t) rem0 ) < 0 ) {
574         z -= LIT64( 0x100000000 );
575         b1 = b<<32;
576         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577     }
578     rem0 = ( rem0<<32 ) | ( rem1>>32 );
579     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580     return z;
581 
582 }
583 
584 /*----------------------------------------------------------------------------
585 | Returns an approximation to the square root of the 32-bit significand given
586 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
587 | `aExp' (the least significant bit) is 1, the integer returned approximates
588 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
589 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
590 | case, the approximation returned lies strictly within +/-2 of the exact
591 | value.
592 *----------------------------------------------------------------------------*/
593 
estimateSqrt32(int16 aExp,uint32_t a)594 static uint32_t estimateSqrt32( int16 aExp, uint32_t a )
595 {
596     static const uint16_t sqrtOddAdjustments[] = {
597         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
598         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
599     };
600     static const uint16_t sqrtEvenAdjustments[] = {
601         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
602         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
603     };
604     int8 index;
605     uint32_t z;
606 
607     index = ( a>>27 ) & 15;
608     if ( aExp & 1 ) {
609         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
610         z = ( ( a / z )<<14 ) + ( z<<15 );
611         a >>= 1;
612     }
613     else {
614         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
615         z = a / z + z;
616         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
617         if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
618     }
619     return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
620 
621 }
622 
623 /*----------------------------------------------------------------------------
624 | Returns the number of leading 0 bits before the most-significant 1 bit of
625 | `a'.  If `a' is zero, 32 is returned.
626 *----------------------------------------------------------------------------*/
627 
countLeadingZeros32(uint32_t a)628 static int8 countLeadingZeros32( uint32_t a )
629 {
630 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
631     if (a) {
632         return __builtin_clz(a);
633     } else {
634         return 32;
635     }
636 #else
637     static const int8 countLeadingZerosHigh[] = {
638         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
639         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
640         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
641         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
642         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
645         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
646         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
654     };
655     int8 shiftCount;
656 
657     shiftCount = 0;
658     if ( a < 0x10000 ) {
659         shiftCount += 16;
660         a <<= 16;
661     }
662     if ( a < 0x1000000 ) {
663         shiftCount += 8;
664         a <<= 8;
665     }
666     shiftCount += countLeadingZerosHigh[ a>>24 ];
667     return shiftCount;
668 #endif
669 }
670 
671 /*----------------------------------------------------------------------------
672 | Returns the number of leading 0 bits before the most-significant 1 bit of
673 | `a'.  If `a' is zero, 64 is returned.
674 *----------------------------------------------------------------------------*/
675 
countLeadingZeros64(uint64_t a)676 static int8 countLeadingZeros64( uint64_t a )
677 {
678 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
679     if (a) {
680         return __builtin_clzll(a);
681     } else {
682         return 64;
683     }
684 #else
685     int8 shiftCount;
686 
687     shiftCount = 0;
688     if ( a < ( (uint64_t) 1 )<<32 ) {
689         shiftCount += 32;
690     }
691     else {
692         a >>= 32;
693     }
694     shiftCount += countLeadingZeros32( a );
695     return shiftCount;
696 #endif
697 }
698 
699 /*----------------------------------------------------------------------------
700 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
701 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
702 | Otherwise, returns 0.
703 *----------------------------------------------------------------------------*/
704 
eq128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)705 INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
706 {
707 
708     return ( a0 == b0 ) && ( a1 == b1 );
709 
710 }
711 
712 /*----------------------------------------------------------------------------
713 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
714 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
715 | Otherwise, returns 0.
716 *----------------------------------------------------------------------------*/
717 
le128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)718 INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
719 {
720 
721     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
722 
723 }
724 
725 /*----------------------------------------------------------------------------
726 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
727 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
728 | returns 0.
729 *----------------------------------------------------------------------------*/
730 
lt128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)731 INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
732 {
733 
734     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
735 
736 }
737 
738 /*----------------------------------------------------------------------------
739 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
740 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
741 | Otherwise, returns 0.
742 *----------------------------------------------------------------------------*/
743 
ne128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)744 INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
745 {
746 
747     return ( a0 != b0 ) || ( a1 != b1 );
748 
749 }
750