• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- High Precision Decimal ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See httpss//llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
10 #define LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
11 
12 #include "src/__support/CPP/limits.h"
13 #include "src/__support/ctype_utils.h"
14 #include "src/__support/str_to_integer.h"
15 #include <stdint.h>
16 
17 namespace LIBC_NAMESPACE {
18 namespace internal {
19 
20 struct LShiftTableEntry {
21   uint32_t new_digits;
22   char const *power_of_five;
23 };
24 
25 // This is used in both this file and in the main str_to_float.h.
26 // TODO: Figure out where to put this.
27 enum class RoundDirection { Up, Down, Nearest };
28 
29 // This is based on the HPD data structure described as part of the Simple
30 // Decimal Conversion algorithm by Nigel Tao, described at this link:
31 // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
32 class HighPrecisionDecimal {
33 
34   // This precomputed table speeds up left shifts by having the number of new
35   // digits that will be added by multiplying 5^i by 2^i. If the number is less
36   // than 5^i then it will add one fewer digit. There are only 60 entries since
37   // that's the max shift amount.
38   // This table was generated by the script at
39   // libc/utils/mathtools/GenerateHPDConstants.py
40   static constexpr LShiftTableEntry LEFT_SHIFT_DIGIT_TABLE[] = {
41       {0, ""},
42       {1, "5"},
43       {1, "25"},
44       {1, "125"},
45       {2, "625"},
46       {2, "3125"},
47       {2, "15625"},
48       {3, "78125"},
49       {3, "390625"},
50       {3, "1953125"},
51       {4, "9765625"},
52       {4, "48828125"},
53       {4, "244140625"},
54       {4, "1220703125"},
55       {5, "6103515625"},
56       {5, "30517578125"},
57       {5, "152587890625"},
58       {6, "762939453125"},
59       {6, "3814697265625"},
60       {6, "19073486328125"},
61       {7, "95367431640625"},
62       {7, "476837158203125"},
63       {7, "2384185791015625"},
64       {7, "11920928955078125"},
65       {8, "59604644775390625"},
66       {8, "298023223876953125"},
67       {8, "1490116119384765625"},
68       {9, "7450580596923828125"},
69       {9, "37252902984619140625"},
70       {9, "186264514923095703125"},
71       {10, "931322574615478515625"},
72       {10, "4656612873077392578125"},
73       {10, "23283064365386962890625"},
74       {10, "116415321826934814453125"},
75       {11, "582076609134674072265625"},
76       {11, "2910383045673370361328125"},
77       {11, "14551915228366851806640625"},
78       {12, "72759576141834259033203125"},
79       {12, "363797880709171295166015625"},
80       {12, "1818989403545856475830078125"},
81       {13, "9094947017729282379150390625"},
82       {13, "45474735088646411895751953125"},
83       {13, "227373675443232059478759765625"},
84       {13, "1136868377216160297393798828125"},
85       {14, "5684341886080801486968994140625"},
86       {14, "28421709430404007434844970703125"},
87       {14, "142108547152020037174224853515625"},
88       {15, "710542735760100185871124267578125"},
89       {15, "3552713678800500929355621337890625"},
90       {15, "17763568394002504646778106689453125"},
91       {16, "88817841970012523233890533447265625"},
92       {16, "444089209850062616169452667236328125"},
93       {16, "2220446049250313080847263336181640625"},
94       {16, "11102230246251565404236316680908203125"},
95       {17, "55511151231257827021181583404541015625"},
96       {17, "277555756156289135105907917022705078125"},
97       {17, "1387778780781445675529539585113525390625"},
98       {18, "6938893903907228377647697925567626953125"},
99       {18, "34694469519536141888238489627838134765625"},
100       {18, "173472347597680709441192448139190673828125"},
101       {19, "867361737988403547205962240695953369140625"},
102   };
103 
104   // The maximum amount we can shift is the number of bits used in the
105   // accumulator, minus the number of bits needed to represent the base (in this
106   // case 4).
107   static constexpr uint32_t MAX_SHIFT_AMOUNT = sizeof(uint64_t) - 4;
108 
109   // 800 is an arbitrary number of digits, but should be
110   // large enough for any practical number.
111   static constexpr uint32_t MAX_NUM_DIGITS = 800;
112 
113   uint32_t num_digits = 0;
114   int32_t decimal_point = 0;
115   bool truncated = false;
116   uint8_t digits[MAX_NUM_DIGITS];
117 
118 private:
should_round_up(int32_t round_to_digit,RoundDirection round)119   LIBC_INLINE bool should_round_up(int32_t round_to_digit,
120                                    RoundDirection round) {
121     if (round_to_digit < 0 ||
122         static_cast<uint32_t>(round_to_digit) >= this->num_digits) {
123       return false;
124     }
125 
126     // The above condition handles all cases where all of the trailing digits
127     // are zero. In that case, if the rounding mode is up, then this number
128     // should be rounded up. Similarly, if the rounding mode is down, then it
129     // should always round down.
130     if (round == RoundDirection::Up) {
131       return true;
132     } else if (round == RoundDirection::Down) {
133       return false;
134     }
135     // Else round to nearest.
136 
137     // If we're right in the middle and there are no extra digits
138     if (this->digits[round_to_digit] == 5 &&
139         static_cast<uint32_t>(round_to_digit + 1) == this->num_digits) {
140 
141       // Round up if we've truncated (since that means the result is slightly
142       // higher than what's represented.)
143       if (this->truncated) {
144         return true;
145       }
146 
147       // If this exactly halfway, round to even.
148       if (round_to_digit == 0)
149         // When the input is ".5".
150         return false;
151       return this->digits[round_to_digit - 1] % 2 != 0;
152     }
153     // If there are digits after round_to_digit, they must be non-zero since we
154     // trim trailing zeroes after all operations that change digits.
155     return this->digits[round_to_digit] >= 5;
156   }
157 
158   // Takes an amount to left shift and returns the number of new digits needed
159   // to store the result based on LEFT_SHIFT_DIGIT_TABLE.
get_num_new_digits(uint32_t lshift_amount)160   LIBC_INLINE uint32_t get_num_new_digits(uint32_t lshift_amount) {
161     const char *power_of_five =
162         LEFT_SHIFT_DIGIT_TABLE[lshift_amount].power_of_five;
163     uint32_t new_digits = LEFT_SHIFT_DIGIT_TABLE[lshift_amount].new_digits;
164     uint32_t digit_index = 0;
165     while (power_of_five[digit_index] != 0) {
166       if (digit_index >= this->num_digits) {
167         return new_digits - 1;
168       }
169       if (this->digits[digit_index] != power_of_five[digit_index] - '0') {
170         return new_digits -
171                ((this->digits[digit_index] < power_of_five[digit_index] - '0')
172                     ? 1
173                     : 0);
174       }
175       ++digit_index;
176     }
177     return new_digits;
178   }
179 
180   // Trim all trailing 0s
trim_trailing_zeroes()181   LIBC_INLINE void trim_trailing_zeroes() {
182     while (this->num_digits > 0 && this->digits[this->num_digits - 1] == 0) {
183       --this->num_digits;
184     }
185     if (this->num_digits == 0) {
186       this->decimal_point = 0;
187     }
188   }
189 
190   // Perform a digitwise binary non-rounding right shift on this value by
191   // shift_amount. The shift_amount can't be more than MAX_SHIFT_AMOUNT to
192   // prevent overflow.
right_shift(uint32_t shift_amount)193   LIBC_INLINE void right_shift(uint32_t shift_amount) {
194     uint32_t read_index = 0;
195     uint32_t write_index = 0;
196 
197     uint64_t accumulator = 0;
198 
199     const uint64_t shift_mask = (uint64_t(1) << shift_amount) - 1;
200 
201     // Warm Up phase: we don't have enough digits to start writing, so just
202     // read them into the accumulator.
203     while (accumulator >> shift_amount == 0) {
204       uint64_t read_digit = 0;
205       // If there are still digits to read, read the next one, else the digit is
206       // assumed to be 0.
207       if (read_index < this->num_digits) {
208         read_digit = this->digits[read_index];
209       }
210       accumulator = accumulator * 10 + read_digit;
211       ++read_index;
212     }
213 
214     // Shift the decimal point by the number of digits it took to fill the
215     // accumulator.
216     this->decimal_point -= read_index - 1;
217 
218     // Middle phase: we have enough digits to write, as well as more digits to
219     // read. Keep reading until we run out of digits.
220     while (read_index < this->num_digits) {
221       uint64_t read_digit = this->digits[read_index];
222       uint64_t write_digit = accumulator >> shift_amount;
223       accumulator &= shift_mask;
224       this->digits[write_index] = static_cast<uint8_t>(write_digit);
225       accumulator = accumulator * 10 + read_digit;
226       ++read_index;
227       ++write_index;
228     }
229 
230     // Cool Down phase: All of the readable digits have been read, so just write
231     // the remainder, while treating any more digits as 0.
232     while (accumulator > 0) {
233       uint64_t write_digit = accumulator >> shift_amount;
234       accumulator &= shift_mask;
235       if (write_index < MAX_NUM_DIGITS) {
236         this->digits[write_index] = static_cast<uint8_t>(write_digit);
237         ++write_index;
238       } else if (write_digit > 0) {
239         this->truncated = true;
240       }
241       accumulator = accumulator * 10;
242     }
243     this->num_digits = write_index;
244     this->trim_trailing_zeroes();
245   }
246 
247   // Perform a digitwise binary non-rounding left shift on this value by
248   // shift_amount. The shift_amount can't be more than MAX_SHIFT_AMOUNT to
249   // prevent overflow.
left_shift(uint32_t shift_amount)250   LIBC_INLINE void left_shift(uint32_t shift_amount) {
251     uint32_t new_digits = this->get_num_new_digits(shift_amount);
252 
253     int32_t read_index = this->num_digits - 1;
254     uint32_t write_index = this->num_digits + new_digits;
255 
256     uint64_t accumulator = 0;
257 
258     // No Warm Up phase. Since we're putting digits in at the top and taking
259     // digits from the bottom we don't have to wait for the accumulator to fill.
260 
261     // Middle phase: while we have more digits to read, keep reading as well as
262     // writing.
263     while (read_index >= 0) {
264       accumulator += static_cast<uint64_t>(this->digits[read_index])
265                      << shift_amount;
266       uint64_t next_accumulator = accumulator / 10;
267       uint64_t write_digit = accumulator - (10 * next_accumulator);
268       --write_index;
269       if (write_index < MAX_NUM_DIGITS) {
270         this->digits[write_index] = static_cast<uint8_t>(write_digit);
271       } else if (write_digit != 0) {
272         this->truncated = true;
273       }
274       accumulator = next_accumulator;
275       --read_index;
276     }
277 
278     // Cool Down phase: there are no more digits to read, so just write the
279     // remaining digits in the accumulator.
280     while (accumulator > 0) {
281       uint64_t next_accumulator = accumulator / 10;
282       uint64_t write_digit = accumulator - (10 * next_accumulator);
283       --write_index;
284       if (write_index < MAX_NUM_DIGITS) {
285         this->digits[write_index] = static_cast<uint8_t>(write_digit);
286       } else if (write_digit != 0) {
287         this->truncated = true;
288       }
289       accumulator = next_accumulator;
290     }
291 
292     this->num_digits += new_digits;
293     if (this->num_digits > MAX_NUM_DIGITS) {
294       this->num_digits = MAX_NUM_DIGITS;
295     }
296     this->decimal_point += new_digits;
297     this->trim_trailing_zeroes();
298   }
299 
300 public:
301   // num_string is assumed to be a string of numeric characters. It doesn't
302   // handle leading spaces.
303   LIBC_INLINE
304   HighPrecisionDecimal(
305       const char *__restrict num_string,
306       const size_t num_len = cpp::numeric_limits<size_t>::max()) {
307     bool saw_dot = false;
308     size_t num_cur = 0;
309     // This counts the digits in the number, even if there isn't space to store
310     // them all.
311     uint32_t total_digits = 0;
312     while (num_cur < num_len &&
313            (isdigit(num_string[num_cur]) || num_string[num_cur] == '.')) {
314       if (num_string[num_cur] == '.') {
315         if (saw_dot) {
316           break;
317         }
318         this->decimal_point = total_digits;
319         saw_dot = true;
320       } else {
321         if (num_string[num_cur] == '0' && this->num_digits == 0) {
322           --this->decimal_point;
323           ++num_cur;
324           continue;
325         }
326         ++total_digits;
327         if (this->num_digits < MAX_NUM_DIGITS) {
328           this->digits[this->num_digits] =
329               static_cast<uint8_t>(num_string[num_cur] - '0');
330           ++this->num_digits;
331         } else if (num_string[num_cur] != '0') {
332           this->truncated = true;
333         }
334       }
335       ++num_cur;
336     }
337 
338     if (!saw_dot)
339       this->decimal_point = total_digits;
340 
341     if (num_cur < num_len && ((num_string[num_cur] | 32) == 'e')) {
342       ++num_cur;
343       if (isdigit(num_string[num_cur]) || num_string[num_cur] == '+' ||
344           num_string[num_cur] == '-') {
345         auto result =
346             strtointeger<int32_t>(num_string + num_cur, 10, num_len - num_cur);
347         if (result.has_error()) {
348           // TODO: handle error
349         }
350         int32_t add_to_exponent = result.value;
351 
352         // Here we do this operation as int64 to avoid overflow.
353         int64_t temp_exponent = static_cast<int64_t>(this->decimal_point) +
354                                 static_cast<int64_t>(add_to_exponent);
355 
356         // Theoretically these numbers should be MAX_BIASED_EXPONENT for long
357         // double, but that should be ~16,000 which is much less than 1 << 30.
358         if (temp_exponent > (1 << 30)) {
359           temp_exponent = (1 << 30);
360         } else if (temp_exponent < -(1 << 30)) {
361           temp_exponent = -(1 << 30);
362         }
363         this->decimal_point = static_cast<int32_t>(temp_exponent);
364       }
365     }
366 
367     this->trim_trailing_zeroes();
368   }
369 
370   // Binary shift left (shift_amount > 0) or right (shift_amount < 0)
shift(int shift_amount)371   LIBC_INLINE void shift(int shift_amount) {
372     if (shift_amount == 0) {
373       return;
374     }
375     // Left
376     else if (shift_amount > 0) {
377       while (static_cast<uint32_t>(shift_amount) > MAX_SHIFT_AMOUNT) {
378         this->left_shift(MAX_SHIFT_AMOUNT);
379         shift_amount -= MAX_SHIFT_AMOUNT;
380       }
381       this->left_shift(shift_amount);
382     }
383     // Right
384     else {
385       while (static_cast<uint32_t>(shift_amount) < -MAX_SHIFT_AMOUNT) {
386         this->right_shift(MAX_SHIFT_AMOUNT);
387         shift_amount += MAX_SHIFT_AMOUNT;
388       }
389       this->right_shift(-shift_amount);
390     }
391   }
392 
393   // Round the number represented to the closest value of unsigned int type T.
394   // This is done ignoring overflow.
395   template <class T>
396   LIBC_INLINE T
397   round_to_integer_type(RoundDirection round = RoundDirection::Nearest) {
398     T result = 0;
399     uint32_t cur_digit = 0;
400 
401     while (static_cast<int32_t>(cur_digit) < this->decimal_point &&
402            cur_digit < this->num_digits) {
403       result = result * 10 + (this->digits[cur_digit]);
404       ++cur_digit;
405     }
406 
407     // If there are implicit 0s at the end of the number, include those.
408     while (static_cast<int32_t>(cur_digit) < this->decimal_point) {
409       result *= 10;
410       ++cur_digit;
411     }
412     return result + this->should_round_up(this->decimal_point, round);
413   }
414 
415   // Extra functions for testing.
416 
get_digits()417   LIBC_INLINE uint8_t *get_digits() { return this->digits; }
get_num_digits()418   LIBC_INLINE uint32_t get_num_digits() { return this->num_digits; }
get_decimal_point()419   LIBC_INLINE int32_t get_decimal_point() { return this->decimal_point; }
set_truncated(bool trunc)420   LIBC_INLINE void set_truncated(bool trunc) { this->truncated = trunc; }
421 };
422 
423 } // namespace internal
424 } // namespace LIBC_NAMESPACE
425 
426 #endif // LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
427