• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- Utilities to convert floating point values to string ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
10 #define LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
11 
12 #include <stdint.h>
13 
14 #include "src/__support/CPP/limits.h"
15 #include "src/__support/CPP/type_traits.h"
16 #include "src/__support/FPUtil/FPBits.h"
17 #include "src/__support/FPUtil/dyadic_float.h"
18 #include "src/__support/big_int.h"
19 #include "src/__support/common.h"
20 #include "src/__support/libc_assert.h"
21 #include "src/__support/macros/attributes.h"
22 
23 // This file has 5 compile-time flags to allow the user to configure the float
24 // to string behavior. These were used to explore tradeoffs during the design
25 // phase, and can still be used to gain specific properties. Unless you
26 // specifically know what you're doing, you should leave all these flags off.
27 
28 // LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD
29 //  This flag disables the separate long double conversion implementation. It is
30 //  not based on the Ryu algorithm, instead generating the digits by
31 //  multiplying/dividing the written-out number by 10^9 to get blocks. It's
32 //  significantly faster than INT_CALC, only about 10x slower than MEGA_TABLE,
33 //  and is small in binary size. Its downside is that it always calculates all
34 //  of the digits above the decimal point, making it inefficient for %e calls
35 //  with large exponents. This specialization overrides other flags, so this
36 //  flag must be set for other flags to effect the long double behavior.
37 
38 // LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
39 //  The Mega Table is ~5 megabytes when compiled. It lists the constants needed
40 //  to perform the Ryu Printf algorithm (described below) for all long double
41 //  values. This makes it extremely fast for both doubles and long doubles, in
42 //  exchange for large binary size.
43 
44 // LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT
45 //  Dyadic floats are software floating point numbers, and their accuracy can be
46 //  as high as necessary. This option uses 256 bit dyadic floats to calculate
47 //  the table values that Ryu Printf needs. This is reasonably fast and very
48 //  small compared to the Mega Table, but the 256 bit floats only give accurate
49 //  results for the first ~50 digits of the output. In practice this shouldn't
50 //  be a problem since long doubles are only accurate for ~35 digits, but the
51 //  trailing values all being 0s may cause brittle tests to fail.
52 
53 // LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC
54 //  Integer Calculation uses wide integers to do the calculations for the Ryu
55 //  Printf table, which is just as accurate as the Mega Table without requiring
56 //  as much code size. These integers can be very large (~32KB at max, though
57 //  always on the stack) to handle the edges of the long double range. They are
58 //  also very slow, taking multiple seconds on a powerful CPU to calculate the
59 //  values at the end of the range. If no flag is set, this is used for long
60 //  doubles, the flag only changes the double behavior.
61 
62 // LIBC_COPT_FLOAT_TO_STR_NO_TABLE
63 //  This flag doesn't change the actual calculation method, instead it is used
64 //  to disable the normal Ryu Printf table for configurations that don't use any
65 //  table at all.
66 
67 // Default Config:
68 //  If no flags are set, doubles use the normal (and much more reasonably sized)
69 //  Ryu Printf table and long doubles use their specialized implementation. This
70 //  provides good performance and binary size.
71 
72 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
73 #include "src/__support/ryu_long_double_constants.h"
74 #elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE)
75 #include "src/__support/ryu_constants.h"
76 #else
77 constexpr size_t IDX_SIZE = 1;
78 constexpr size_t MID_INT_SIZE = 192;
79 #endif
80 
81 // This implementation is based on the Ryu Printf algorithm by Ulf Adams:
82 // Ulf Adams. 2019. Ryū revisited: printf floating point conversion.
83 // Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages.
84 // https://doi.org/10.1145/3360595
85 
86 // This version is modified to require significantly less memory (it doesn't use
87 // a large buffer to store the result).
88 
89 // The general concept of this algorithm is as follows:
90 // We want to calculate a 9 digit segment of a floating point number using this
91 // formula: floor((mantissa * 2^exponent)/10^i) % 10^9.
92 // To do so normally would involve large integers (~1000 bits for doubles), so
93 // we use a shortcut. We can avoid calculating 2^exponent / 10^i by using a
94 // lookup table. The resulting intermediate value needs to be about 192 bits to
95 // store the result with enough precision. Since this is all being done with
96 // integers for appropriate precision, we would run into a problem if
97 // i > exponent since then 2^exponent / 10^i would be less than 1. To correct
98 // for this, the actual calculation done is 2^(exponent + c) / 10^i, and then
99 // when multiplying by the mantissa we reverse this by dividing by 2^c, like so:
100 // floor((mantissa * table[exponent][i])/(2^c)) % 10^9.
101 // This gives a 9 digit value, which is small enough to fit in a 32 bit integer,
102 // and that integer is converted into a string as normal, and called a block. In
103 // this implementation, the most recent block is buffered, so that if rounding
104 // is necessary the block can be adjusted before being written to the output.
105 // Any block that is all 9s adds one to the max block counter and doesn't clear
106 // the buffer because they can cause the block above them to be rounded up.
107 
108 namespace LIBC_NAMESPACE {
109 
110 using BlockInt = uint32_t;
111 constexpr uint32_t BLOCK_SIZE = 9;
112 constexpr uint64_t EXP5_9 = 1953125;
113 constexpr uint64_t EXP10_9 = 1000000000;
114 
115 using FPBits = fputil::FPBits<long double>;
116 
117 // Larger numbers prefer a slightly larger constant than is used for the smaller
118 // numbers.
119 constexpr size_t CALC_SHIFT_CONST = 128;
120 
121 namespace internal {
122 
123 // Returns floor(log_10(2^e)); requires 0 <= e <= 42039.
log10_pow2(uint64_t e)124 LIBC_INLINE constexpr uint32_t log10_pow2(uint64_t e) {
125   LIBC_ASSERT(e <= 42039 &&
126               "Incorrect exponent to perform log10_pow2 approximation.");
127   // This approximation is based on the float value for log_10(2). It first
128   // gives an incorrect result for our purposes at 42039 (well beyond the 16383
129   // maximum for long doubles).
130 
131   // To get these constants I first evaluated log_10(2) to get an approximation
132   // of 0.301029996. Next I passed that value through a string to double
133   // conversion to get an explicit mantissa of 0x13441350fbd738 and an exponent
134   // of -2 (which becomes -54 when we shift the mantissa to be a non-fractional
135   // number). Next I shifted the mantissa right 12 bits to create more space for
136   // the multiplication result, adding 12 to the exponent to compensate. To
137   // check that this approximation works for our purposes I used the following
138   // python code:
139   // for i in range(16384):
140   //   if(len(str(2**i)) != (((i*0x13441350fbd)>>42)+1)):
141   //     print(i)
142   // The reason we add 1 is because this evaluation truncates the result, giving
143   // us the floor, whereas counting the digits of the power of 2 gives us the
144   // ceiling. With a similar loop I checked the maximum valid value and found
145   // 42039.
146   return static_cast<uint32_t>((e * 0x13441350fbdll) >> 42);
147 }
148 
149 // Same as above, but with different constants.
log2_pow5(uint64_t e)150 LIBC_INLINE constexpr uint32_t log2_pow5(uint64_t e) {
151   return static_cast<uint32_t>((e * 0x12934f0979bll) >> 39);
152 }
153 
154 // Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any
155 // power of 2 was also a power of 10, but since that doesn't exist this is
156 // always accurate. This is used to calculate the maximum number of base-10
157 // digits a given e-bit number could have.
ceil_log10_pow2(uint32_t e)158 LIBC_INLINE constexpr uint32_t ceil_log10_pow2(uint32_t e) {
159   return log10_pow2(e) + 1;
160 }
161 
div_ceil(uint32_t num,uint32_t denom)162 LIBC_INLINE constexpr uint32_t div_ceil(uint32_t num, uint32_t denom) {
163   return (num + (denom - 1)) / denom;
164 }
165 
166 // Returns the maximum number of 9 digit blocks a number described by the given
167 // index (which is ceil(exponent/16)) and mantissa width could need.
length_for_num(uint32_t idx,uint32_t mantissa_width)168 LIBC_INLINE constexpr uint32_t length_for_num(uint32_t idx,
169                                               uint32_t mantissa_width) {
170   return div_ceil(ceil_log10_pow2(idx) + ceil_log10_pow2(mantissa_width + 1),
171                   BLOCK_SIZE);
172 }
173 
174 // The formula for the table when i is positive (or zero) is as follows:
175 // floor(10^(-9i) * 2^(e + c_1) + 1) % (10^9 * 2^c_1)
176 // Rewritten slightly we get:
177 // floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1)
178 
179 // TODO: Fix long doubles (needs bigger table or alternate algorithm.)
180 // Currently the table values are generated, which is very slow.
181 template <size_t INT_SIZE>
get_table_positive(int exponent,size_t i)182 LIBC_INLINE constexpr UInt<MID_INT_SIZE> get_table_positive(int exponent,
183                                                             size_t i) {
184   // INT_SIZE is the size of int that is used for the internal calculations of
185   // this function. It should be large enough to hold 2^(exponent+constant), so
186   // ~1000 for double and ~16000 for long double. Be warned that the time
187   // complexity of exponentiation is O(n^2 * log_2(m)) where n is the number of
188   // bits in the number being exponentiated and m is the exponent.
189   const int shift_amount =
190       static_cast<int>(exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * i));
191   if (shift_amount < 0) {
192     return 1;
193   }
194   UInt<INT_SIZE> num(0);
195   // MOD_SIZE is one of the limiting factors for how big the constant argument
196   // can get, since it needs to be small enough to fit in the result UInt,
197   // otherwise we'll get truncation on return.
198   constexpr UInt<INT_SIZE> MOD_SIZE =
199       (UInt<INT_SIZE>(EXP10_9)
200        << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
201 
202   num = UInt<INT_SIZE>(1) << (shift_amount);
203   if (i > 0) {
204     UInt<INT_SIZE> fives(EXP5_9);
205     fives.pow_n(i);
206     num = num / fives;
207   }
208 
209   num = num + 1;
210   if (num > MOD_SIZE) {
211     auto rem = num.div_uint_half_times_pow_2(
212                       EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
213                    .value();
214     num = rem;
215   }
216   return num;
217 }
218 
219 template <size_t INT_SIZE>
get_table_positive_df(int exponent,size_t i)220 LIBC_INLINE UInt<MID_INT_SIZE> get_table_positive_df(int exponent, size_t i) {
221   static_assert(INT_SIZE == 256,
222                 "Only 256 is supported as an int size right now.");
223   // This version uses dyadic floats with 256 bit mantissas to perform the same
224   // calculation as above. Due to floating point imprecision it is only accurate
225   // for the first 50 digits, but it's much faster. Since even 128 bit long
226   // doubles are only accurate to ~35 digits, the 50 digits of accuracy are
227   // enough for these floats to be converted back and forth safely. This is
228   // ideal for avoiding the size of the long double table.
229   const int shift_amount =
230       static_cast<int>(exponent + CALC_SHIFT_CONST - (9 * i));
231   if (shift_amount < 0) {
232     return 1;
233   }
234   fputil::DyadicFloat<INT_SIZE> num(false, 0, 1);
235   constexpr UInt<INT_SIZE> MOD_SIZE =
236       (UInt<INT_SIZE>(EXP10_9)
237        << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
238 
239   constexpr UInt<INT_SIZE> FIVE_EXP_MINUS_NINE_MANT{
240       {0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030,
241        0x89705f4136b4a597}};
242 
243   static const fputil::DyadicFloat<INT_SIZE> FIVE_EXP_MINUS_NINE(
244       false, -276, FIVE_EXP_MINUS_NINE_MANT);
245 
246   if (i > 0) {
247     fputil::DyadicFloat<INT_SIZE> fives = fputil::pow_n(FIVE_EXP_MINUS_NINE, i);
248     num = fives;
249   }
250   num = mul_pow_2(num, shift_amount);
251 
252   // Adding one is part of the formula.
253   UInt<INT_SIZE> int_num = static_cast<UInt<INT_SIZE>>(num) + 1;
254   if (int_num > MOD_SIZE) {
255     auto rem =
256         int_num
257             .div_uint_half_times_pow_2(
258                 EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
259             .value();
260     int_num = rem;
261   }
262 
263   UInt<MID_INT_SIZE> result = int_num;
264 
265   return result;
266 }
267 
268 // The formula for the table when i is negative (or zero) is as follows:
269 // floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0)
270 // Since we know i is always negative, we just take it as unsigned and treat it
271 // as negative. We do the same with exponent, while they're both always negative
272 // in theory, in practice they're converted to positive for simpler
273 // calculations.
274 // The formula being used looks more like this:
275 // floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0)
276 template <size_t INT_SIZE>
get_table_negative(int exponent,size_t i)277 LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative(int exponent, size_t i) {
278   int shift_amount = CALC_SHIFT_CONST - exponent;
279   UInt<INT_SIZE> num(1);
280   constexpr UInt<INT_SIZE> MOD_SIZE =
281       (UInt<INT_SIZE>(EXP10_9)
282        << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
283 
284   size_t ten_blocks = i;
285   size_t five_blocks = 0;
286   if (shift_amount < 0) {
287     int block_shifts = (-shift_amount) / BLOCK_SIZE;
288     if (block_shifts < static_cast<int>(ten_blocks)) {
289       ten_blocks = ten_blocks - block_shifts;
290       five_blocks = block_shifts;
291       shift_amount = shift_amount + (block_shifts * BLOCK_SIZE);
292     } else {
293       ten_blocks = 0;
294       five_blocks = i;
295       shift_amount = shift_amount + (static_cast<int>(i) * BLOCK_SIZE);
296     }
297   }
298 
299   if (five_blocks > 0) {
300     UInt<INT_SIZE> fives(EXP5_9);
301     fives.pow_n(five_blocks);
302     num = fives;
303   }
304   if (ten_blocks > 0) {
305     UInt<INT_SIZE> tens(EXP10_9);
306     tens.pow_n(ten_blocks);
307     if (five_blocks <= 0) {
308       num = tens;
309     } else {
310       num *= tens;
311     }
312   }
313 
314   if (shift_amount > 0) {
315     num = num << shift_amount;
316   } else {
317     num = num >> (-shift_amount);
318   }
319   if (num > MOD_SIZE) {
320     auto rem = num.div_uint_half_times_pow_2(
321                       EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
322                    .value();
323     num = rem;
324   }
325   return num;
326 }
327 
328 template <size_t INT_SIZE>
get_table_negative_df(int exponent,size_t i)329 LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative_df(int exponent, size_t i) {
330   static_assert(INT_SIZE == 256,
331                 "Only 256 is supported as an int size right now.");
332   // This version uses dyadic floats with 256 bit mantissas to perform the same
333   // calculation as above. Due to floating point imprecision it is only accurate
334   // for the first 50 digits, but it's much faster. Since even 128 bit long
335   // doubles are only accurate to ~35 digits, the 50 digits of accuracy are
336   // enough for these floats to be converted back and forth safely. This is
337   // ideal for avoiding the size of the long double table.
338 
339   int shift_amount = CALC_SHIFT_CONST - exponent;
340 
341   fputil::DyadicFloat<INT_SIZE> num(false, 0, 1);
342   constexpr UInt<INT_SIZE> MOD_SIZE =
343       (UInt<INT_SIZE>(EXP10_9)
344        << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
345 
346   constexpr UInt<INT_SIZE> TEN_EXP_NINE_MANT(EXP10_9);
347 
348   static const fputil::DyadicFloat<INT_SIZE> TEN_EXP_NINE(false, 0,
349                                                           TEN_EXP_NINE_MANT);
350 
351   if (i > 0) {
352     fputil::DyadicFloat<INT_SIZE> tens = fputil::pow_n(TEN_EXP_NINE, i);
353     num = tens;
354   }
355   num = mul_pow_2(num, shift_amount);
356 
357   UInt<INT_SIZE> int_num = static_cast<UInt<INT_SIZE>>(num);
358   if (int_num > MOD_SIZE) {
359     auto rem =
360         int_num
361             .div_uint_half_times_pow_2(
362                 EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
363             .value();
364     int_num = rem;
365   }
366 
367   UInt<MID_INT_SIZE> result = int_num;
368 
369   return result;
370 }
371 
fast_uint_mod_1e9(const UInt<MID_INT_SIZE> & val)372 LIBC_INLINE uint32_t fast_uint_mod_1e9(const UInt<MID_INT_SIZE> &val) {
373   // The formula for mult_const is:
374   //  1 + floor((2^(bits in target integer size + log_2(divider))) / divider)
375   // Where divider is 10^9 and target integer size is 128.
376   const UInt<MID_INT_SIZE> mult_const(
377       {0x31680A88F8953031u, 0x89705F4136B4A597u, 0});
378   const auto middle = (mult_const * val);
379   const uint64_t result = static_cast<uint64_t>(middle[2]);
380   const uint64_t shifted = result >> 29;
381   return static_cast<uint32_t>(static_cast<uint32_t>(val) -
382                                (EXP10_9 * shifted));
383 }
384 
mul_shift_mod_1e9(const FPBits::StorageType mantissa,const UInt<MID_INT_SIZE> & large,const int32_t shift_amount)385 LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa,
386                                        const UInt<MID_INT_SIZE> &large,
387                                        const int32_t shift_amount) {
388   UInt<MID_INT_SIZE + FPBits::STORAGE_LEN> val(large);
389   val = (val * mantissa) >> shift_amount;
390   return static_cast<uint32_t>(
391       val.div_uint_half_times_pow_2(static_cast<uint32_t>(EXP10_9), 0).value());
392 }
393 
394 } // namespace internal
395 
396 // Convert floating point values to their string representation.
397 // Because the result may not fit in a reasonably sized array, the caller must
398 // request blocks of digits and convert them from integers to strings themself.
399 // Blocks contain the most digits that can be stored in an BlockInt. This is 9
400 // digits for a 32 bit int and 18 digits for a 64 bit int.
401 // The intended use pattern is to create a FloatToString object of the
402 // appropriate type, then call get_positive_blocks to get an approximate number
403 // of blocks there are before the decimal point. Now the client code can start
404 // calling get_positive_block in a loop from the number of positive blocks to
405 // zero. This will give all digits before the decimal point. Then the user can
406 // start calling get_negative_block in a loop from 0 until the number of digits
407 // they need is reached. As an optimization, the client can use
408 // zero_blocks_after_point to find the number of blocks that are guaranteed to
409 // be zero after the decimal point and before the non-zero digits. Additionally,
410 // is_lowest_block will return if the current block is the lowest non-zero
411 // block.
412 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
413 class FloatToString {
414   fputil::FPBits<T> float_bits;
415   int exponent;
416   FPBits::StorageType mantissa;
417 
418   static constexpr int FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN;
419   static constexpr int EXP_BIAS = fputil::FPBits<T>::EXP_BIAS;
420 
421 public:
FloatToString(T init_float)422   LIBC_INLINE constexpr FloatToString(T init_float) : float_bits(init_float) {
423     exponent = float_bits.get_explicit_exponent();
424     mantissa = float_bits.get_explicit_mantissa();
425 
426     // Adjust for the width of the mantissa.
427     exponent -= FRACTION_LEN;
428   }
429 
is_nan()430   LIBC_INLINE constexpr bool is_nan() { return float_bits.is_nan(); }
is_inf()431   LIBC_INLINE constexpr bool is_inf() { return float_bits.is_inf(); }
is_inf_or_nan()432   LIBC_INLINE constexpr bool is_inf_or_nan() {
433     return float_bits.is_inf_or_nan();
434   }
435 
436   // get_block returns an integer that represents the digits in the requested
437   // block.
get_positive_block(int block_index)438   LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) {
439     if (exponent >= -FRACTION_LEN) {
440       // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to
441       // find the coarse section of the POW10_SPLIT table that will be used to
442       // calculate the 9 digit window, as well as some other related values.
443       const uint32_t idx =
444           exponent < 0
445               ? 0
446               : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
447 
448       // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) -
449       // exponent
450 
451       const uint32_t pos_exp = idx * IDX_SIZE;
452 
453       UInt<MID_INT_SIZE> val;
454 
455 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT)
456       // ----------------------- DYADIC FLOAT CALC MODE ------------------------
457       const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
458       val = internal::get_table_positive_df<256>(IDX_SIZE * idx, block_index);
459 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC)
460 
461       // ---------------------------- INT CALC MODE ----------------------------
462       const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
463       const uint64_t MAX_POW_2_SIZE =
464           pos_exp + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index);
465       const uint64_t MAX_POW_5_SIZE =
466           internal::log2_pow5(BLOCK_SIZE * block_index);
467       const uint64_t MAX_INT_SIZE =
468           (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE;
469 
470       if (MAX_INT_SIZE < 1024) {
471         val = internal::get_table_positive<1024>(pos_exp, block_index);
472       } else if (MAX_INT_SIZE < 2048) {
473         val = internal::get_table_positive<2048>(pos_exp, block_index);
474       } else if (MAX_INT_SIZE < 4096) {
475         val = internal::get_table_positive<4096>(pos_exp, block_index);
476       } else if (MAX_INT_SIZE < 8192) {
477         val = internal::get_table_positive<8192>(pos_exp, block_index);
478       } else if (MAX_INT_SIZE < 16384) {
479         val = internal::get_table_positive<16384>(pos_exp, block_index);
480       } else {
481         val = internal::get_table_positive<16384 + 128>(pos_exp, block_index);
482       }
483 #else
484       // ----------------------------- TABLE MODE ------------------------------
485       const int32_t SHIFT_CONST = TABLE_SHIFT_CONST;
486 
487       val = POW10_SPLIT[POW10_OFFSET[idx] + block_index];
488 #endif
489       const uint32_t shift_amount = SHIFT_CONST + pos_exp - exponent;
490 
491       const BlockInt digits =
492           internal::mul_shift_mod_1e9(mantissa, val, (int32_t)(shift_amount));
493       return digits;
494     } else {
495       return 0;
496     }
497   }
498 
get_negative_block(int block_index)499   LIBC_INLINE constexpr BlockInt get_negative_block(int block_index) {
500     if (exponent < 0) {
501       const int32_t idx = -exponent / IDX_SIZE;
502 
503       UInt<MID_INT_SIZE> val;
504 
505       const uint32_t pos_exp = static_cast<uint32_t>(idx * IDX_SIZE);
506 
507 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT)
508       // ----------------------- DYADIC FLOAT CALC MODE ------------------------
509       const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
510       val = internal::get_table_negative_df<256>(pos_exp, block_index + 1);
511 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC)
512       // ---------------------------- INT CALC MODE ----------------------------
513       const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
514 
515       const uint64_t NUM_FIVES = (block_index + 1) * BLOCK_SIZE;
516       // Round MAX_INT_SIZE up to the nearest 64 (adding 1 because log2_pow5
517       // implicitly rounds down).
518       const uint64_t MAX_INT_SIZE =
519           ((internal::log2_pow5(NUM_FIVES) / 64) + 1) * 64;
520 
521       if (MAX_INT_SIZE < 1024) {
522         val = internal::get_table_negative<1024>(pos_exp, block_index + 1);
523       } else if (MAX_INT_SIZE < 2048) {
524         val = internal::get_table_negative<2048>(pos_exp, block_index + 1);
525       } else if (MAX_INT_SIZE < 4096) {
526         val = internal::get_table_negative<4096>(pos_exp, block_index + 1);
527       } else if (MAX_INT_SIZE < 8192) {
528         val = internal::get_table_negative<8192>(pos_exp, block_index + 1);
529       } else if (MAX_INT_SIZE < 16384) {
530         val = internal::get_table_negative<16384>(pos_exp, block_index + 1);
531       } else {
532         val = internal::get_table_negative<16384 + 8192>(pos_exp,
533                                                          block_index + 1);
534       }
535 #else
536       // ----------------------------- TABLE MODE ------------------------------
537       // if the requested block is zero
538       const int32_t SHIFT_CONST = TABLE_SHIFT_CONST;
539       if (block_index < MIN_BLOCK_2[idx]) {
540         return 0;
541       }
542       const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx];
543       // If every digit after the requested block is zero.
544       if (p >= POW10_OFFSET_2[idx + 1]) {
545         return 0;
546       }
547 
548       val = POW10_SPLIT_2[p];
549 #endif
550       const int32_t shift_amount =
551           SHIFT_CONST + (-exponent - static_cast<int32_t>(pos_exp));
552       BlockInt digits =
553           internal::mul_shift_mod_1e9(mantissa, val, shift_amount);
554       return digits;
555     } else {
556       return 0;
557     }
558   }
559 
get_block(int block_index)560   LIBC_INLINE constexpr BlockInt get_block(int block_index) {
561     if (block_index >= 0) {
562       return get_positive_block(block_index);
563     } else {
564       return get_negative_block(-1 - block_index);
565     }
566   }
567 
get_positive_blocks()568   LIBC_INLINE constexpr size_t get_positive_blocks() {
569     if (exponent < -FRACTION_LEN)
570       return 0;
571     const uint32_t idx =
572         exponent < 0
573             ? 0
574             : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
575     return internal::length_for_num(idx * IDX_SIZE, FRACTION_LEN);
576   }
577 
578   // This takes the index of a block after the decimal point (a negative block)
579   // and return if it's sure that all of the digits after it are zero.
is_lowest_block(size_t negative_block_index)580   LIBC_INLINE constexpr bool is_lowest_block(size_t negative_block_index) {
581 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE
582     // The decimal representation of 2**(-i) will have exactly i digits after
583     // the decimal point.
584     int num_requested_digits =
585         static_cast<int>((negative_block_index + 1) * BLOCK_SIZE);
586 
587     return num_requested_digits > -exponent;
588 #else
589     const int32_t idx = -exponent / IDX_SIZE;
590     const size_t p =
591         POW10_OFFSET_2[idx] + negative_block_index - MIN_BLOCK_2[idx];
592     // If the remaining digits are all 0, then this is the lowest block.
593     return p >= POW10_OFFSET_2[idx + 1];
594 #endif
595   }
596 
zero_blocks_after_point()597   LIBC_INLINE constexpr size_t zero_blocks_after_point() {
598 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE
599     if (exponent < -FRACTION_LEN) {
600       const int pos_exp = -exponent - 1;
601       const uint32_t pos_idx =
602           static_cast<uint32_t>(pos_exp + (IDX_SIZE - 1)) / IDX_SIZE;
603       const int32_t pos_len = ((internal::ceil_log10_pow2(pos_idx * IDX_SIZE) -
604                                 internal::ceil_log10_pow2(FRACTION_LEN + 1)) /
605                                BLOCK_SIZE) -
606                               1;
607       return static_cast<uint32_t>(pos_len > 0 ? pos_len : 0);
608     }
609     return 0;
610 #else
611     return MIN_BLOCK_2[-exponent / IDX_SIZE];
612 #endif
613   }
614 };
615 
616 #if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) &&                             \
617     !defined(LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD)
618 // --------------------------- LONG DOUBLE FUNCTIONS ---------------------------
619 
620 // this algorithm will work exactly the same for 80 bit and 128 bit long
621 // doubles. They have the same max exponent, but even if they didn't the
622 // constants should be calculated to be correct for any provided floating point
623 // type.
624 
625 template <> class FloatToString<long double> {
626   fputil::FPBits<long double> float_bits;
627   bool is_negative = 0;
628   int exponent = 0;
629   FPBits::StorageType mantissa = 0;
630 
631   static constexpr int FRACTION_LEN = fputil::FPBits<long double>::FRACTION_LEN;
632   static constexpr int EXP_BIAS = fputil::FPBits<long double>::EXP_BIAS;
633   static constexpr size_t UINT_WORD_SIZE = 64;
634 
635   static constexpr size_t FLOAT_AS_INT_WIDTH =
636       internal::div_ceil(fputil::FPBits<long double>::MAX_BIASED_EXPONENT -
637                              FPBits::EXP_BIAS,
638                          UINT_WORD_SIZE) *
639       UINT_WORD_SIZE;
640   static constexpr size_t EXTRA_INT_WIDTH =
641       internal::div_ceil(sizeof(long double) * CHAR_BIT, UINT_WORD_SIZE) *
642       UINT_WORD_SIZE;
643 
644   using wide_int = UInt<FLOAT_AS_INT_WIDTH + EXTRA_INT_WIDTH>;
645 
646   // float_as_fixed represents the floating point number as a fixed point number
647   // with the point EXTRA_INT_WIDTH bits from the left of the number. This can
648   // store any number with a negative exponent.
649   wide_int float_as_fixed = 0;
650   int int_block_index = 0;
651 
652   static constexpr size_t BLOCK_BUFFER_LEN =
653       internal::div_ceil(internal::log10_pow2(FLOAT_AS_INT_WIDTH), BLOCK_SIZE) +
654       1;
655   BlockInt block_buffer[BLOCK_BUFFER_LEN] = {0};
656   size_t block_buffer_valid = 0;
657 
658   template <size_t Bits>
grab_digits(UInt<Bits> & int_num)659   LIBC_INLINE static constexpr BlockInt grab_digits(UInt<Bits> &int_num) {
660     auto wide_result = int_num.div_uint_half_times_pow_2(EXP5_9, 9);
661     // the optional only comes into effect when dividing by 0, which will
662     // never happen here. Thus, we just assert that it has value.
663     LIBC_ASSERT(wide_result.has_value());
664     return static_cast<BlockInt>(wide_result.value());
665   }
666 
zero_leading_digits(wide_int & int_num)667   LIBC_INLINE static constexpr void zero_leading_digits(wide_int &int_num) {
668     // WORD_SIZE is the width of the numbers used to internally represent the
669     // UInt
670     for (size_t i = 0; i < EXTRA_INT_WIDTH / wide_int::WORD_SIZE; ++i)
671       int_num[i + (FLOAT_AS_INT_WIDTH / wide_int::WORD_SIZE)] = 0;
672   }
673 
674   // init_convert initializes float_as_int, cur_block, and block_buffer based on
675   // the mantissa and exponent of the initial number. Calling it will always
676   // return the class to the starting state.
init_convert()677   LIBC_INLINE constexpr void init_convert() {
678     // No calculation necessary for the 0 case.
679     if (mantissa == 0 && exponent == 0)
680       return;
681 
682     if (exponent > 0) {
683       // if the exponent is positive, then the number is fully above the decimal
684       // point. In this case we represent the float as an integer, then divide
685       // by 10^BLOCK_SIZE and take the remainder as our next block. This
686       // generates the digits from right to left, but the digits will be written
687       // from left to right, so it caches the results so they can be read in
688       // reverse order.
689 
690       wide_int float_as_int = mantissa;
691 
692       float_as_int <<= exponent;
693       int_block_index = 0;
694 
695       while (float_as_int > 0) {
696         LIBC_ASSERT(int_block_index < static_cast<int>(BLOCK_BUFFER_LEN));
697         block_buffer[int_block_index] =
698             grab_digits<FLOAT_AS_INT_WIDTH + EXTRA_INT_WIDTH>(float_as_int);
699         ++int_block_index;
700       }
701       block_buffer_valid = int_block_index;
702 
703     } else {
704       // if the exponent is not positive, then the number is at least partially
705       // below the decimal point. In this case we represent the float as a fixed
706       // point number with the decimal point after the top EXTRA_INT_WIDTH bits.
707       float_as_fixed = mantissa;
708 
709       const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent;
710       static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8);
711       float_as_fixed <<= SHIFT_AMOUNT;
712 
713       // If there are still digits above the decimal point, handle those.
714       if (cpp::countl_zero(float_as_fixed) <
715           static_cast<int>(EXTRA_INT_WIDTH)) {
716         UInt<EXTRA_INT_WIDTH> above_decimal_point =
717             float_as_fixed >> FLOAT_AS_INT_WIDTH;
718 
719         size_t positive_int_block_index = 0;
720         while (above_decimal_point > 0) {
721           block_buffer[positive_int_block_index] =
722               grab_digits<EXTRA_INT_WIDTH>(above_decimal_point);
723           ++positive_int_block_index;
724         }
725         block_buffer_valid = positive_int_block_index;
726 
727         // Zero all digits above the decimal point.
728         zero_leading_digits(float_as_fixed);
729         int_block_index = 0;
730       }
731     }
732   }
733 
734 public:
FloatToString(long double init_float)735   LIBC_INLINE constexpr FloatToString(long double init_float)
736       : float_bits(init_float) {
737     is_negative = float_bits.is_neg();
738     exponent = float_bits.get_explicit_exponent();
739     mantissa = float_bits.get_explicit_mantissa();
740 
741     // Adjust for the width of the mantissa.
742     exponent -= FRACTION_LEN;
743 
744     this->init_convert();
745   }
746 
get_positive_blocks()747   LIBC_INLINE constexpr size_t get_positive_blocks() {
748     if (exponent < -FRACTION_LEN)
749       return 0;
750 
751     const uint32_t idx =
752         exponent < 0
753             ? 0
754             : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
755     return internal::length_for_num(idx * IDX_SIZE, FRACTION_LEN);
756   }
757 
zero_blocks_after_point()758   LIBC_INLINE constexpr size_t zero_blocks_after_point() {
759 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
760     return MIN_BLOCK_2[-exponent / IDX_SIZE];
761 #else
762     if (exponent >= -FRACTION_LEN)
763       return 0;
764 
765     const int pos_exp = -exponent - 1;
766     const uint32_t pos_idx =
767         static_cast<uint32_t>(pos_exp + (IDX_SIZE - 1)) / IDX_SIZE;
768     const int32_t pos_len = ((internal::ceil_log10_pow2(pos_idx * IDX_SIZE) -
769                               internal::ceil_log10_pow2(FRACTION_LEN + 1)) /
770                              BLOCK_SIZE) -
771                             1;
772     return static_cast<uint32_t>(pos_len > 0 ? pos_len : 0);
773 #endif
774   }
775 
is_lowest_block(size_t negative_block_index)776   LIBC_INLINE constexpr bool is_lowest_block(size_t negative_block_index) {
777     // The decimal representation of 2**(-i) will have exactly i digits after
778     // the decimal point.
779     const int num_requested_digits =
780         static_cast<int>((negative_block_index + 1) * BLOCK_SIZE);
781 
782     return num_requested_digits > -exponent;
783   }
784 
get_positive_block(int block_index)785   LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) {
786     if (exponent < -FRACTION_LEN)
787       return 0;
788     if (block_index > static_cast<int>(block_buffer_valid) || block_index < 0)
789       return 0;
790 
791     LIBC_ASSERT(block_index < static_cast<int>(BLOCK_BUFFER_LEN));
792 
793     return block_buffer[block_index];
794   }
795 
get_negative_block(int negative_block_index)796   LIBC_INLINE constexpr BlockInt get_negative_block(int negative_block_index) {
797     if (exponent >= 0)
798       return 0;
799 
800     // negative_block_index starts at 0 with the first block after the decimal
801     // point, and 1 with the second and so on. This converts to the same
802     // block_index used everywhere else.
803 
804     const int block_index = -1 - negative_block_index;
805 
806     // If we're currently after the requested block (remember these are
807     // negative indices) we reset the number to the start. This is only
808     // likely to happen in %g calls. This will also reset int_block_index.
809     // if (block_index > int_block_index) {
810     //   init_convert();
811     // }
812 
813     // Printf is the only existing user of this code and it will only ever move
814     // downwards, except for %g but that currently creates a second
815     // float_to_string object so this assertion still holds. If a new user needs
816     // the ability to step backwards, uncomment the code above.
817     LIBC_ASSERT(block_index <= int_block_index);
818 
819     // If we are currently before the requested block. Step until we reach the
820     // requested block. This is likely to only be one step.
821     while (block_index < int_block_index) {
822       zero_leading_digits(float_as_fixed);
823       float_as_fixed.mul(EXP10_9);
824       --int_block_index;
825     }
826 
827     // We're now on the requested block, return the current block.
828     return static_cast<BlockInt>(float_as_fixed >> FLOAT_AS_INT_WIDTH);
829   }
830 
get_block(int block_index)831   LIBC_INLINE constexpr BlockInt get_block(int block_index) {
832     if (block_index >= 0)
833       return get_positive_block(block_index);
834 
835     return get_negative_block(-1 - block_index);
836   }
837 };
838 
839 #endif // !LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64 &&
840        // !LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD
841 
842 } // namespace LIBC_NAMESPACE
843 
844 #endif // LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
845