• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This file contains string processing functions related to
16 // numeric values.
17 
18 #include "absl/strings/numbers.h"
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <cfloat>  // for DBL_DIG and FLT_DIG
23 #include <climits>
24 #include <cmath>   // for HUGE_VAL
25 #include <cstddef>
26 #include <cstdint>
27 #include <cstdio>
28 #include <cstdlib>
29 #include <cstring>
30 #include <iterator>
31 #include <limits>
32 #include <system_error>  // NOLINT(build/c++11)
33 #include <type_traits>
34 #include <utility>
35 
36 #include "absl/base/attributes.h"
37 #include "absl/base/config.h"
38 #include "absl/base/internal/endian.h"
39 #include "absl/base/internal/raw_logging.h"
40 #include "absl/base/nullability.h"
41 #include "absl/base/optimization.h"
42 #include "absl/numeric/bits.h"
43 #include "absl/numeric/int128.h"
44 #include "absl/strings/ascii.h"
45 #include "absl/strings/charconv.h"
46 #include "absl/strings/match.h"
47 #include "absl/strings/string_view.h"
48 
49 namespace absl {
50 ABSL_NAMESPACE_BEGIN
51 
SimpleAtof(absl::string_view str,absl::Nonnull<float * > out)52 bool SimpleAtof(absl::string_view str, absl::Nonnull<float*> out) {
53   *out = 0.0;
54   str = StripAsciiWhitespace(str);
55   // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one
56   // is present, skip it, while avoiding accepting "+-0" as valid.
57   if (!str.empty() && str[0] == '+') {
58     str.remove_prefix(1);
59     if (!str.empty() && str[0] == '-') {
60       return false;
61     }
62   }
63   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
64   if (result.ec == std::errc::invalid_argument) {
65     return false;
66   }
67   if (result.ptr != str.data() + str.size()) {
68     // not all non-whitespace characters consumed
69     return false;
70   }
71   // from_chars() with DR 3081's current wording will return max() on
72   // overflow.  SimpleAtof returns infinity instead.
73   if (result.ec == std::errc::result_out_of_range) {
74     if (*out > 1.0) {
75       *out = std::numeric_limits<float>::infinity();
76     } else if (*out < -1.0) {
77       *out = -std::numeric_limits<float>::infinity();
78     }
79   }
80   return true;
81 }
82 
SimpleAtod(absl::string_view str,absl::Nonnull<double * > out)83 bool SimpleAtod(absl::string_view str, absl::Nonnull<double*> out) {
84   *out = 0.0;
85   str = StripAsciiWhitespace(str);
86   // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one
87   // is present, skip it, while avoiding accepting "+-0" as valid.
88   if (!str.empty() && str[0] == '+') {
89     str.remove_prefix(1);
90     if (!str.empty() && str[0] == '-') {
91       return false;
92     }
93   }
94   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
95   if (result.ec == std::errc::invalid_argument) {
96     return false;
97   }
98   if (result.ptr != str.data() + str.size()) {
99     // not all non-whitespace characters consumed
100     return false;
101   }
102   // from_chars() with DR 3081's current wording will return max() on
103   // overflow.  SimpleAtod returns infinity instead.
104   if (result.ec == std::errc::result_out_of_range) {
105     if (*out > 1.0) {
106       *out = std::numeric_limits<double>::infinity();
107     } else if (*out < -1.0) {
108       *out = -std::numeric_limits<double>::infinity();
109     }
110   }
111   return true;
112 }
113 
SimpleAtob(absl::string_view str,absl::Nonnull<bool * > out)114 bool SimpleAtob(absl::string_view str, absl::Nonnull<bool*> out) {
115   ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
116   if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
117       EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
118       EqualsIgnoreCase(str, "1")) {
119     *out = true;
120     return true;
121   }
122   if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
123       EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
124       EqualsIgnoreCase(str, "0")) {
125     *out = false;
126     return true;
127   }
128   return false;
129 }
130 
131 // ----------------------------------------------------------------------
132 // FastIntToBuffer() overloads
133 //
134 // Like the Fast*ToBuffer() functions above, these are intended for speed.
135 // Unlike the Fast*ToBuffer() functions, however, these functions write
136 // their output to the beginning of the buffer.  The caller is responsible
137 // for ensuring that the buffer has enough space to hold the output.
138 //
139 // Returns a pointer to the end of the string (i.e. the null character
140 // terminating the string).
141 // ----------------------------------------------------------------------
142 
143 namespace {
144 
145 // Various routines to encode integers to strings.
146 
147 // We split data encodings into a group of 2 digits, 4 digits, 8 digits as
148 // it's easier to combine powers of two into scalar arithmetic.
149 
150 // Previous implementation used a lookup table of 200 bytes for every 2 bytes
151 // and it was memory bound, any L1 cache miss would result in a much slower
152 // result. When benchmarking with a cache eviction rate of several percent,
153 // this implementation proved to be better.
154 
155 // These constants represent '00', '0000' and '00000000' as ascii strings in
156 // integers. We can add these numbers if we encode to bytes from 0 to 9. as
157 // 'i' = '0' + i for 0 <= i <= 9.
158 constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
159 constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
160 constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
161 
162 template <typename T>
Pow(T base,uint32_t n)163 constexpr T Pow(T base, uint32_t n) {
164   // Exponentiation by squaring
165   return static_cast<T>((n > 1 ? Pow(base * base, n >> 1) : static_cast<T>(1)) *
166                         ((n & 1) ? base : static_cast<T>(1)));
167 }
168 
169 // Given n, calculates C where the following holds for all 0 <= x < Pow(100, n):
170 // x / Pow(10, n) == x * C / Pow(2, n * 10)
171 // In other words, it allows us to divide by a power of 10 via a single
172 // multiplication and bit shifts, assuming the input will be smaller than the
173 // square of that power of 10.
174 template <typename T>
ComputePowerOf100DivisionCoefficient(uint32_t n)175 constexpr T ComputePowerOf100DivisionCoefficient(uint32_t n) {
176   if (n > 4) {
177     // This doesn't work for large powers of 100, due to overflow
178     abort();
179   }
180   T denom = 16 - 1;
181   T num = (denom + 1) - 10;
182   T gcd = 3;  // Greatest common divisor of numerator and denominator
183   denom = Pow(denom / gcd, n);
184   num = Pow(num / gcd, 9 * n);
185   T quotient = num / denom;
186   if (num % denom >= denom / 2) {
187     // Round up, since the remainder is more than half the denominator
188     ++quotient;
189   }
190   return quotient;
191 }
192 
193 // * kDivisionBy10Mul / kDivisionBy10Div is a division by 10 for values from 0
194 // to 99. It's also a division of a structure [k takes 2 bytes][m takes 2
195 // bytes], then * kDivisionBy10Mul / kDivisionBy10Div will be [k / 10][m / 10].
196 // It allows parallel division.
197 constexpr uint64_t kDivisionBy10Mul =
198     ComputePowerOf100DivisionCoefficient<uint64_t>(1);
199 static_assert(kDivisionBy10Mul == 103,
200               "division coefficient for 10 is incorrect");
201 constexpr uint64_t kDivisionBy10Div = 1 << 10;
202 
203 // * kDivisionBy100Mul / kDivisionBy100Div is a division by 100 for values from
204 // 0 to 9999.
205 constexpr uint64_t kDivisionBy100Mul =
206     ComputePowerOf100DivisionCoefficient<uint64_t>(2);
207 static_assert(kDivisionBy100Mul == 10486,
208               "division coefficient for 100 is incorrect");
209 constexpr uint64_t kDivisionBy100Div = 1 << 20;
210 
211 static_assert(ComputePowerOf100DivisionCoefficient<uint64_t>(3) == 1073742,
212               "division coefficient for 1000 is incorrect");
213 
214 // Same as `PrepareEightDigits`, but produces 2 digits for integers < 100.
PrepareTwoDigitsImpl(uint32_t i,bool reversed)215 inline uint32_t PrepareTwoDigitsImpl(uint32_t i, bool reversed) {
216   assert(i < 100);
217   uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
218   uint32_t mod10 = i - 10u * div10;
219   return (div10 << (reversed ? 8 : 0)) + (mod10 << (reversed ? 0 : 8));
220 }
PrepareTwoDigits(uint32_t i)221 inline uint32_t PrepareTwoDigits(uint32_t i) {
222   return PrepareTwoDigitsImpl(i, false);
223 }
224 
225 // Same as `PrepareEightDigits`, but produces 4 digits for integers < 10000.
PrepareFourDigitsImpl(uint32_t n,bool reversed)226 inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) {
227   // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
228   // blocks. 123 ->  [\0\1][\0\23]. We divide by 10 both blocks
229   // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
230   // parallel". Then we combine both results to have both ASCII digits,
231   // strip trailing zeros, add ASCII '0000' and return.
232   uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
233   uint32_t mod100 = n - 100ull * div100;
234   uint32_t hundreds =
235       (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
236   uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
237   tens &= (0xFull << 16) | 0xFull;
238   tens = (tens << (reversed ? 8 : 0)) +
239          static_cast<uint32_t>((hundreds - 10ull * tens) << (reversed ? 0 : 8));
240   return tens;
241 }
PrepareFourDigits(uint32_t n)242 inline uint32_t PrepareFourDigits(uint32_t n) {
243   return PrepareFourDigitsImpl(n, false);
244 }
PrepareFourDigitsReversed(uint32_t n)245 inline uint32_t PrepareFourDigitsReversed(uint32_t n) {
246   return PrepareFourDigitsImpl(n, true);
247 }
248 
249 // Helper function to produce an ASCII representation of `i`.
250 //
251 // Function returns an 8-byte integer which when summed with `kEightZeroBytes`,
252 // can be treated as a printable buffer with ascii representation of `i`,
253 // possibly with leading zeros.
254 //
255 // Example:
256 //
257 //  uint64_t buffer = PrepareEightDigits(102030) + kEightZeroBytes;
258 //  char* ascii = reinterpret_cast<char*>(&buffer);
259 //  // Note two leading zeros:
260 //  EXPECT_EQ(absl::string_view(ascii, 8), "00102030");
261 //
262 // If `Reversed` is set to true, the result becomes reversed to "03020100".
263 //
264 // Pre-condition: `i` must be less than 100000000.
PrepareEightDigitsImpl(uint32_t i,bool reversed)265 inline uint64_t PrepareEightDigitsImpl(uint32_t i, bool reversed) {
266   ABSL_ASSUME(i < 10000'0000);
267   // Prepare 2 blocks of 4 digits "in parallel".
268   uint32_t hi = i / 10000;
269   uint32_t lo = i % 10000;
270   uint64_t merged = (uint64_t{hi} << (reversed ? 32 : 0)) |
271                     (uint64_t{lo} << (reversed ? 0 : 32));
272   uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
273                     ((0x7Full << 32) | 0x7Full);
274   uint64_t mod100 = merged - 100ull * div100;
275   uint64_t hundreds =
276       (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
277   uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
278   tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
279   tens = (tens << (reversed ? 8 : 0)) +
280          ((hundreds - 10ull * tens) << (reversed ? 0 : 8));
281   return tens;
282 }
PrepareEightDigits(uint32_t i)283 inline uint64_t PrepareEightDigits(uint32_t i) {
284   return PrepareEightDigitsImpl(i, false);
285 }
PrepareEightDigitsReversed(uint32_t i)286 inline uint64_t PrepareEightDigitsReversed(uint32_t i) {
287   return PrepareEightDigitsImpl(i, true);
288 }
289 
290 template <typename T, typename BackwardIt>
291 class FastUIntToStringConverter {
292   static_assert(
293       std::is_same<T, decltype(+std::declval<T>())>::value,
294       "to avoid code bloat, only instantiate this for int and larger types");
295   static_assert(std::is_unsigned<T>::value,
296                 "this class is only for unsigned types");
297 
298  public:
299   // Outputs the given number backward (like with std::copy_backward),
300   // starting from the end of the string.
301   // The number of digits in the number must have been already measured and
302   // passed *exactly*, otherwise the behavior is undefined.
303   // (This is an optimization, as calculating the number of digits again would
304   // slow down the hot path.)
305   // Returns an iterator to the start of the suffix that was appended.
FastIntToBufferBackward(T v,BackwardIt end)306   static BackwardIt FastIntToBufferBackward(T v, BackwardIt end) {
307     // THIS IS A HOT FUNCTION with a very deliberate structure to exploit branch
308     // prediction and shorten the critical path for smaller numbers.
309     // Do not move around the if/else blocks or attempt to simplify it
310     // without benchmarking any changes.
311 
312     if (v < 10) {
313       goto AT_LEAST_1 /* NOTE: mandatory for the 0 case */;
314     }
315     if (v < 1000) {
316       goto AT_LEAST_10;
317     }
318     if (v < 10000000) {
319       goto AT_LEAST_1000;
320     }
321 
322     if (v >= 100000000 / 10) {
323       if (v >= 10000000000000000 / 10) {
324         DoFastIntToBufferBackward<8>(v, end);
325       }
326       DoFastIntToBufferBackward<8>(v, end);
327     }
328 
329     if (v >= 10000 / 10) {
330     AT_LEAST_1000:
331       DoFastIntToBufferBackward<4>(v, end);
332     }
333 
334     if (v >= 100 / 10) {
335     AT_LEAST_10:
336       DoFastIntToBufferBackward<2>(v, end);
337     }
338 
339     if (v >= 10 / 10) {
340     AT_LEAST_1:
341       end = DoFastIntToBufferBackward(v, end, std::integral_constant<int, 1>());
342     }
343     return end;
344   }
345 
346  private:
347   // Only assume pointers are contiguous for now. String and vector iterators
348   // could be special-cased as well, but there's no need for them here.
349   // With C++20 we can probably switch to std::contiguous_iterator_tag.
350   static constexpr bool kIsContiguousIterator =
351       std::is_pointer<BackwardIt>::value;
352 
353   template <int Exponent>
DoFastIntToBufferBackward(T & v,BackwardIt & end)354   static void DoFastIntToBufferBackward(T& v, BackwardIt& end) {
355     constexpr T kModulus = Pow<T>(10, Exponent);
356     T remainder = static_cast<T>(v % kModulus);
357     v = static_cast<T>(v / kModulus);
358     end = DoFastIntToBufferBackward(remainder, end,
359                                     std::integral_constant<int, Exponent>());
360   }
361 
DoFastIntToBufferBackward(const T &,BackwardIt end,std::integral_constant<int,0>)362   static BackwardIt DoFastIntToBufferBackward(const T&, BackwardIt end,
363                                               std::integral_constant<int, 0>) {
364     return end;
365   }
366 
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,1>)367   static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
368                                               std::integral_constant<int, 1>) {
369     *--end = static_cast<char>('0' + v);
370     return DoFastIntToBufferBackward(v, end, std::integral_constant<int, 0>());
371   }
372 
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,4>)373   static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
374                                               std::integral_constant<int, 4>) {
375     if (kIsContiguousIterator) {
376       const uint32_t digits =
377           PrepareFourDigits(static_cast<uint32_t>(v)) + kFourZeroBytes;
378       end -= sizeof(digits);
379       little_endian::Store32(&*end, digits);
380     } else {
381       uint32_t digits =
382           PrepareFourDigitsReversed(static_cast<uint32_t>(v)) + kFourZeroBytes;
383       for (size_t i = 0; i < sizeof(digits); ++i) {
384         *--end = static_cast<char>(digits);
385         digits >>= CHAR_BIT;
386       }
387     }
388     return end;
389   }
390 
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,8>)391   static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
392                                               std::integral_constant<int, 8>) {
393     if (kIsContiguousIterator) {
394       const uint64_t digits =
395           PrepareEightDigits(static_cast<uint32_t>(v)) + kEightZeroBytes;
396       end -= sizeof(digits);
397       little_endian::Store64(&*end, digits);
398     } else {
399       uint64_t digits = PrepareEightDigitsReversed(static_cast<uint32_t>(v)) +
400                         kEightZeroBytes;
401       for (size_t i = 0; i < sizeof(digits); ++i) {
402         *--end = static_cast<char>(digits);
403         digits >>= CHAR_BIT;
404       }
405     }
406     return end;
407   }
408 
409   template <int Digits>
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,Digits>)410   static BackwardIt DoFastIntToBufferBackward(
411       T v, BackwardIt end, std::integral_constant<int, Digits>) {
412     constexpr int kLogModulus = Digits - Digits / 2;
413     constexpr T kModulus = Pow(static_cast<T>(10), kLogModulus);
414     bool is_safe_to_use_division_trick = Digits <= 8;
415     T quotient, remainder;
416     if (is_safe_to_use_division_trick) {
417       constexpr uint64_t kCoefficient =
418           ComputePowerOf100DivisionCoefficient<uint64_t>(kLogModulus);
419       quotient = (v * kCoefficient) >> (10 * kLogModulus);
420       remainder = v - quotient * kModulus;
421     } else {
422       quotient = v / kModulus;
423       remainder = v % kModulus;
424     }
425     end = DoFastIntToBufferBackward(remainder, end,
426                                     std::integral_constant<int, kLogModulus>());
427     return DoFastIntToBufferBackward(
428         quotient, end, std::integral_constant<int, Digits - kLogModulus>());
429   }
430 };
431 
432 // Returns an iterator to the start of the suffix that was appended
433 template <typename T, typename BackwardIt>
434 std::enable_if_t<std::is_unsigned<T>::value, BackwardIt>
DoFastIntToBufferBackward(T v,BackwardIt end,uint32_t digits)435 DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
436   using PromotedT = std::decay_t<decltype(+v)>;
437   using Converter = FastUIntToStringConverter<PromotedT, BackwardIt>;
438   (void)digits;
439   return Converter().FastIntToBufferBackward(v, end);
440 }
441 
442 template <typename T, typename BackwardIt>
443 std::enable_if_t<std::is_signed<T>::value, BackwardIt>
DoFastIntToBufferBackward(T v,BackwardIt end,uint32_t digits)444 DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
445   if (absl::numbers_internal::IsNegative(v)) {
446     // Store the minus sign *before* we produce the number itself, not after.
447     // This gets us a tail call.
448     end[-static_cast<ptrdiff_t>(digits) - 1] = '-';
449   }
450   return DoFastIntToBufferBackward(
451       absl::numbers_internal::UnsignedAbsoluteValue(v), end, digits);
452 }
453 
454 template <class T>
455 std::enable_if_t<std::is_integral<T>::value, int>
GetNumDigitsOrNegativeIfNegativeImpl(T v)456 GetNumDigitsOrNegativeIfNegativeImpl(T v) {
457   const auto /* either bool or std::false_type */ is_negative =
458       absl::numbers_internal::IsNegative(v);
459   const int digits = static_cast<int>(absl::numbers_internal::Base10Digits(
460       absl::numbers_internal::UnsignedAbsoluteValue(v)));
461   return is_negative ? ~digits : digits;
462 }
463 
464 }  // namespace
465 
PutTwoDigits(uint32_t i,absl::Nonnull<char * > buf)466 void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) {
467   little_endian::Store16(
468       buf, static_cast<uint16_t>(PrepareTwoDigits(i) + kTwoZeroBytes));
469 }
470 
FastIntToBuffer(uint32_t i,absl::Nonnull<char * > buffer)471 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
472     uint32_t i, absl::Nonnull<char*> buffer) {
473   const uint32_t digits = absl::numbers_internal::Base10Digits(i);
474   buffer += digits;
475   *buffer = '\0';  // We're going backward, so store this first
476   FastIntToBufferBackward(i, buffer, digits);
477   return buffer;
478 }
479 
FastIntToBuffer(int32_t i,absl::Nonnull<char * > buffer)480 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
481     int32_t i, absl::Nonnull<char*> buffer) {
482   buffer += static_cast<int>(i < 0);
483   uint32_t digits = absl::numbers_internal::Base10Digits(
484       absl::numbers_internal::UnsignedAbsoluteValue(i));
485   buffer += digits;
486   *buffer = '\0';  // We're going backward, so store this first
487   FastIntToBufferBackward(i, buffer, digits);
488   return buffer;
489 }
490 
FastIntToBuffer(uint64_t i,absl::Nonnull<char * > buffer)491 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
492     uint64_t i, absl::Nonnull<char*> buffer) {
493   uint32_t digits = absl::numbers_internal::Base10Digits(i);
494   buffer += digits;
495   *buffer = '\0';  // We're going backward, so store this first
496   FastIntToBufferBackward(i, buffer, digits);
497   return buffer;
498 }
499 
FastIntToBuffer(int64_t i,absl::Nonnull<char * > buffer)500 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
501     int64_t i, absl::Nonnull<char*> buffer) {
502   buffer += static_cast<int>(i < 0);
503   uint32_t digits = absl::numbers_internal::Base10Digits(
504       absl::numbers_internal::UnsignedAbsoluteValue(i));
505   buffer += digits;
506   *buffer = '\0';  // We're going backward, so store this first
507   FastIntToBufferBackward(i, buffer, digits);
508   return buffer;
509 }
510 
FastIntToBufferBackward(uint32_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)511 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
512     uint32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
513   return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
514 }
515 
FastIntToBufferBackward(int32_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)516 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
517     int32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
518   return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
519 }
520 
FastIntToBufferBackward(uint64_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)521 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
522     uint64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
523   return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
524 }
525 
FastIntToBufferBackward(int64_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)526 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
527     int64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
528   return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
529 }
530 
GetNumDigitsOrNegativeIfNegative(signed char v)531 int numbers_internal::GetNumDigitsOrNegativeIfNegative(signed char v) {
532   return GetNumDigitsOrNegativeIfNegativeImpl(v);
533 }
GetNumDigitsOrNegativeIfNegative(unsigned char v)534 int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned char v) {
535   return GetNumDigitsOrNegativeIfNegativeImpl(v);
536 }
GetNumDigitsOrNegativeIfNegative(short v)537 int numbers_internal::GetNumDigitsOrNegativeIfNegative(short v) {  // NOLINT
538   return GetNumDigitsOrNegativeIfNegativeImpl(v);
539 }
GetNumDigitsOrNegativeIfNegative(unsigned short v)540 int numbers_internal::GetNumDigitsOrNegativeIfNegative(
541     unsigned short v) {  // NOLINT
542   return GetNumDigitsOrNegativeIfNegativeImpl(v);
543 }
GetNumDigitsOrNegativeIfNegative(int v)544 int numbers_internal::GetNumDigitsOrNegativeIfNegative(int v) {
545   return GetNumDigitsOrNegativeIfNegativeImpl(v);
546 }
GetNumDigitsOrNegativeIfNegative(unsigned int v)547 int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned int v) {
548   return GetNumDigitsOrNegativeIfNegativeImpl(v);
549 }
GetNumDigitsOrNegativeIfNegative(long v)550 int numbers_internal::GetNumDigitsOrNegativeIfNegative(long v) {  // NOLINT
551   return GetNumDigitsOrNegativeIfNegativeImpl(v);
552 }
GetNumDigitsOrNegativeIfNegative(unsigned long v)553 int numbers_internal::GetNumDigitsOrNegativeIfNegative(
554     unsigned long v) {  // NOLINT
555   return GetNumDigitsOrNegativeIfNegativeImpl(v);
556 }
GetNumDigitsOrNegativeIfNegative(long long v)557 int numbers_internal::GetNumDigitsOrNegativeIfNegative(long long v) {  // NOLINT
558   return GetNumDigitsOrNegativeIfNegativeImpl(v);
559 }
GetNumDigitsOrNegativeIfNegative(unsigned long long v)560 int numbers_internal::GetNumDigitsOrNegativeIfNegative(
561     unsigned long long v) {  // NOLINT
562   return GetNumDigitsOrNegativeIfNegativeImpl(v);
563 }
564 
565 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
566 // return that number multiplied by the given 32-bit value.  If the result is
567 // too large to fit in a 128-bit number, divide it by 2 until it fits.
Mul32(std::pair<uint64_t,uint64_t> num,uint32_t mul)568 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
569                                            uint32_t mul) {
570   uint64_t bits0_31 = num.second & 0xFFFFFFFF;
571   uint64_t bits32_63 = num.second >> 32;
572   uint64_t bits64_95 = num.first & 0xFFFFFFFF;
573   uint64_t bits96_127 = num.first >> 32;
574 
575   // The picture so far: each of these 64-bit values has only the lower 32 bits
576   // filled in.
577   // bits96_127:          [ 00000000 xxxxxxxx ]
578   // bits64_95:                    [ 00000000 xxxxxxxx ]
579   // bits32_63:                             [ 00000000 xxxxxxxx ]
580   // bits0_31:                                       [ 00000000 xxxxxxxx ]
581 
582   bits0_31 *= mul;
583   bits32_63 *= mul;
584   bits64_95 *= mul;
585   bits96_127 *= mul;
586 
587   // Now the top halves may also have value, though all 64 of their bits will
588   // never be set at the same time, since they are a result of a 32x32 bit
589   // multiply.  This makes the carry calculation slightly easier.
590   // bits96_127:          [ mmmmmmmm | mmmmmmmm ]
591   // bits64_95:                    [ | mmmmmmmm mmmmmmmm | ]
592   // bits32_63:                      |        [ mmmmmmmm | mmmmmmmm ]
593   // bits0_31:                       |                 [ | mmmmmmmm mmmmmmmm ]
594   // eventually:        [ bits128_up | ...bits64_127.... | ..bits0_63... ]
595 
596   uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
597   uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
598                         (bits0_63 < bits0_31);
599   uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
600   if (bits128_up == 0) return {bits64_127, bits0_63};
601 
602   auto shift = static_cast<unsigned>(bit_width(bits128_up));
603   uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
604   uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
605   return {hi, lo};
606 }
607 
608 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
609 // where the first bit is always a one.  So PowFive(1, 0) starts 0b100000,
610 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
PowFive(uint64_t num,int expfive)611 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
612   std::pair<uint64_t, uint64_t> result = {num, 0};
613   while (expfive >= 13) {
614     // 5^13 is the highest power of five that will fit in a 32-bit integer.
615     result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
616     expfive -= 13;
617   }
618   constexpr uint32_t powers_of_five[13] = {
619       1,
620       5,
621       5 * 5,
622       5 * 5 * 5,
623       5 * 5 * 5 * 5,
624       5 * 5 * 5 * 5 * 5,
625       5 * 5 * 5 * 5 * 5 * 5,
626       5 * 5 * 5 * 5 * 5 * 5 * 5,
627       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
628       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
629       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
630       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
631       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
632   result = Mul32(result, powers_of_five[expfive & 15]);
633   int shift = countl_zero(result.first);
634   if (shift != 0) {
635     result.first = (result.first << shift) + (result.second >> (64 - shift));
636     result.second = (result.second << shift);
637   }
638   return result;
639 }
640 
641 struct ExpDigits {
642   int32_t exponent;
643   char digits[6];
644 };
645 
646 // SplitToSix converts value, a positive double-precision floating-point number,
647 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
648 // zero.  For example, SplitToSix(1) returns an exponent of zero and a digits
649 // array of {'1', '0', '0', '0', '0', '0'}.  If value is exactly halfway between
650 // two possible representations, e.g. value = 100000.5, then "round to even" is
651 // performed.
SplitToSix(const double value)652 static ExpDigits SplitToSix(const double value) {
653   ExpDigits exp_dig;
654   int exp = 5;
655   double d = value;
656   // First step: calculate a close approximation of the output, where the
657   // value d will be between 100,000 and 999,999, representing the digits
658   // in the output ASCII array, and exp is the base-10 exponent.  It would be
659   // faster to use a table here, and to look up the base-2 exponent of value,
660   // however value is an IEEE-754 64-bit number, so the table would have 2,000
661   // entries, which is not cache-friendly.
662   if (d >= 999999.5) {
663     if (d >= 1e+261) exp += 256, d *= 1e-256;
664     if (d >= 1e+133) exp += 128, d *= 1e-128;
665     if (d >= 1e+69) exp += 64, d *= 1e-64;
666     if (d >= 1e+37) exp += 32, d *= 1e-32;
667     if (d >= 1e+21) exp += 16, d *= 1e-16;
668     if (d >= 1e+13) exp += 8, d *= 1e-8;
669     if (d >= 1e+9) exp += 4, d *= 1e-4;
670     if (d >= 1e+7) exp += 2, d *= 1e-2;
671     if (d >= 1e+6) exp += 1, d *= 1e-1;
672   } else {
673     if (d < 1e-250) exp -= 256, d *= 1e256;
674     if (d < 1e-122) exp -= 128, d *= 1e128;
675     if (d < 1e-58) exp -= 64, d *= 1e64;
676     if (d < 1e-26) exp -= 32, d *= 1e32;
677     if (d < 1e-10) exp -= 16, d *= 1e16;
678     if (d < 1e-2) exp -= 8, d *= 1e8;
679     if (d < 1e+2) exp -= 4, d *= 1e4;
680     if (d < 1e+4) exp -= 2, d *= 1e2;
681     if (d < 1e+5) exp -= 1, d *= 1e1;
682   }
683   // At this point, d is in the range [99999.5..999999.5) and exp is in the
684   // range [-324..308]. Since we need to round d up, we want to add a half
685   // and truncate.
686   // However, the technique above may have lost some precision, due to its
687   // repeated multiplication by constants that each may be off by half a bit
688   // of precision.  This only matters if we're close to the edge though.
689   // Since we'd like to know if the fractional part of d is close to a half,
690   // we multiply it by 65536 and see if the fractional part is close to 32768.
691   // (The number doesn't have to be a power of two,but powers of two are faster)
692   uint64_t d64k = d * 65536;
693   uint32_t dddddd;  // A 6-digit decimal integer.
694   if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
695     // OK, it's fairly likely that precision was lost above, which is
696     // not a surprise given only 52 mantissa bits are available.  Therefore
697     // redo the calculation using 128-bit numbers.  (64 bits are not enough).
698 
699     // Start out with digits rounded down; maybe add one below.
700     dddddd = static_cast<uint32_t>(d64k / 65536);
701 
702     // mantissa is a 64-bit integer representing M.mmm... * 2^63.  The actual
703     // value we're representing, of course, is M.mmm... * 2^exp2.
704     int exp2;
705     double m = std::frexp(value, &exp2);
706     uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
707     // std::frexp returns an m value in the range [0.5, 1.0), however we
708     // can't multiply it by 2^64 and convert to an integer because some FPUs
709     // throw an exception when converting an number higher than 2^63 into an
710     // integer - even an unsigned 64-bit integer!  Fortunately it doesn't matter
711     // since m only has 52 significant bits anyway.
712     mantissa <<= 1;
713     exp2 -= 64;  // not needed, but nice for debugging
714 
715     // OK, we are here to compare:
716     //     (dddddd + 0.5) * 10^(exp-5)  vs.  mantissa * 2^exp2
717     // so we can round up dddddd if appropriate.  Those values span the full
718     // range of 600 orders of magnitude of IEE 64-bit floating-point.
719     // Fortunately, we already know they are very close, so we don't need to
720     // track the base-2 exponent of both sides.  This greatly simplifies the
721     // the math since the 2^exp2 calculation is unnecessary and the power-of-10
722     // calculation can become a power-of-5 instead.
723 
724     std::pair<uint64_t, uint64_t> edge, val;
725     if (exp >= 6) {
726       // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
727       // Since we're tossing powers of two, 2 * dddddd + 1 is the
728       // same as dddddd + 0.5
729       edge = PowFive(2 * dddddd + 1, exp - 5);
730 
731       val.first = mantissa;
732       val.second = 0;
733     } else {
734       // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
735       // above because (exp - 5) is negative.  So we compare (dddddd + 0.5) to
736       // mantissa * 5 ^ (5 - exp)
737       edge = PowFive(2 * dddddd + 1, 0);
738 
739       val = PowFive(mantissa, 5 - exp);
740     }
741     // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
742     //        val.second, edge.first, edge.second);
743     if (val > edge) {
744       dddddd++;
745     } else if (val == edge) {
746       dddddd += (dddddd & 1);
747     }
748   } else {
749     // Here, we are not close to the edge.
750     dddddd = static_cast<uint32_t>((d64k + 32768) / 65536);
751   }
752   if (dddddd == 1000000) {
753     dddddd = 100000;
754     exp += 1;
755   }
756   exp_dig.exponent = exp;
757 
758   uint32_t two_digits = dddddd / 10000;
759   dddddd -= two_digits * 10000;
760   numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
761 
762   two_digits = dddddd / 100;
763   dddddd -= two_digits * 100;
764   numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
765 
766   numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
767   return exp_dig;
768 }
769 
770 // Helper function for fast formatting of floating-point.
771 // The result is the same as "%g", a.k.a. "%.6g".
SixDigitsToBuffer(double d,absl::Nonnull<char * > const buffer)772 size_t numbers_internal::SixDigitsToBuffer(double d,
773                                            absl::Nonnull<char*> const buffer) {
774   static_assert(std::numeric_limits<float>::is_iec559,
775                 "IEEE-754/IEC-559 support only");
776 
777   char* out = buffer;  // we write data to out, incrementing as we go, but
778                        // FloatToBuffer always returns the address of the buffer
779                        // passed in.
780 
781   if (std::isnan(d)) {
782     strcpy(out, "nan");  // NOLINT(runtime/printf)
783     return 3;
784   }
785   if (d == 0) {  // +0 and -0 are handled here
786     if (std::signbit(d)) *out++ = '-';
787     *out++ = '0';
788     *out = 0;
789     return static_cast<size_t>(out - buffer);
790   }
791   if (d < 0) {
792     *out++ = '-';
793     d = -d;
794   }
795   if (d > std::numeric_limits<double>::max()) {
796     strcpy(out, "inf");  // NOLINT(runtime/printf)
797     return static_cast<size_t>(out + 3 - buffer);
798   }
799 
800   auto exp_dig = SplitToSix(d);
801   int exp = exp_dig.exponent;
802   const char* digits = exp_dig.digits;
803   out[0] = '0';
804   out[1] = '.';
805   switch (exp) {
806     case 5:
807       memcpy(out, &digits[0], 6), out += 6;
808       *out = 0;
809       return static_cast<size_t>(out - buffer);
810     case 4:
811       memcpy(out, &digits[0], 5), out += 5;
812       if (digits[5] != '0') {
813         *out++ = '.';
814         *out++ = digits[5];
815       }
816       *out = 0;
817       return static_cast<size_t>(out - buffer);
818     case 3:
819       memcpy(out, &digits[0], 4), out += 4;
820       if ((digits[5] | digits[4]) != '0') {
821         *out++ = '.';
822         *out++ = digits[4];
823         if (digits[5] != '0') *out++ = digits[5];
824       }
825       *out = 0;
826       return static_cast<size_t>(out - buffer);
827     case 2:
828       memcpy(out, &digits[0], 3), out += 3;
829       *out++ = '.';
830       memcpy(out, &digits[3], 3);
831       out += 3;
832       while (out[-1] == '0') --out;
833       if (out[-1] == '.') --out;
834       *out = 0;
835       return static_cast<size_t>(out - buffer);
836     case 1:
837       memcpy(out, &digits[0], 2), out += 2;
838       *out++ = '.';
839       memcpy(out, &digits[2], 4);
840       out += 4;
841       while (out[-1] == '0') --out;
842       if (out[-1] == '.') --out;
843       *out = 0;
844       return static_cast<size_t>(out - buffer);
845     case 0:
846       memcpy(out, &digits[0], 1), out += 1;
847       *out++ = '.';
848       memcpy(out, &digits[1], 5);
849       out += 5;
850       while (out[-1] == '0') --out;
851       if (out[-1] == '.') --out;
852       *out = 0;
853       return static_cast<size_t>(out - buffer);
854     case -4:
855       out[2] = '0';
856       ++out;
857       ABSL_FALLTHROUGH_INTENDED;
858     case -3:
859       out[2] = '0';
860       ++out;
861       ABSL_FALLTHROUGH_INTENDED;
862     case -2:
863       out[2] = '0';
864       ++out;
865       ABSL_FALLTHROUGH_INTENDED;
866     case -1:
867       out += 2;
868       memcpy(out, &digits[0], 6);
869       out += 6;
870       while (out[-1] == '0') --out;
871       *out = 0;
872       return static_cast<size_t>(out - buffer);
873   }
874   assert(exp < -4 || exp >= 6);
875   out[0] = digits[0];
876   assert(out[1] == '.');
877   out += 2;
878   memcpy(out, &digits[1], 5), out += 5;
879   while (out[-1] == '0') --out;
880   if (out[-1] == '.') --out;
881   *out++ = 'e';
882   if (exp > 0) {
883     *out++ = '+';
884   } else {
885     *out++ = '-';
886     exp = -exp;
887   }
888   if (exp > 99) {
889     int dig1 = exp / 100;
890     exp -= dig1 * 100;
891     *out++ = '0' + static_cast<char>(dig1);
892   }
893   PutTwoDigits(static_cast<uint32_t>(exp), out);
894   out += 2;
895   *out = 0;
896   return static_cast<size_t>(out - buffer);
897 }
898 
899 namespace {
900 // Represents integer values of digits.
901 // Uses 36 to indicate an invalid character since we support
902 // bases up to 36.
903 static const int8_t kAsciiToInt[256] = {
904     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,  // 16 36s.
905     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
906     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,
907     6,  7,  8,  9,  36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
908     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
909     36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
910     24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
911     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
912     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
913     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
914     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
915     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
916     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
917     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
918 
919 // Parse the sign and optional hex or oct prefix in text.
safe_parse_sign_and_base(absl::Nonnull<absl::string_view * > text,absl::Nonnull<int * > base_ptr,absl::Nonnull<bool * > negative_ptr)920 inline bool safe_parse_sign_and_base(
921     absl::Nonnull<absl::string_view*> text /*inout*/,
922     absl::Nonnull<int*> base_ptr /*inout*/,
923     absl::Nonnull<bool*> negative_ptr /*output*/) {
924   if (text->data() == nullptr) {
925     return false;
926   }
927 
928   const char* start = text->data();
929   const char* end = start + text->size();
930   int base = *base_ptr;
931 
932   // Consume whitespace.
933   while (start < end &&
934          absl::ascii_isspace(static_cast<unsigned char>(start[0]))) {
935     ++start;
936   }
937   while (start < end &&
938          absl::ascii_isspace(static_cast<unsigned char>(end[-1]))) {
939     --end;
940   }
941   if (start >= end) {
942     return false;
943   }
944 
945   // Consume sign.
946   *negative_ptr = (start[0] == '-');
947   if (*negative_ptr || start[0] == '+') {
948     ++start;
949     if (start >= end) {
950       return false;
951     }
952   }
953 
954   // Consume base-dependent prefix.
955   //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
956   //  base 16: "0x" -> base 16
957   // Also validate the base.
958   if (base == 0) {
959     if (end - start >= 2 && start[0] == '0' &&
960         (start[1] == 'x' || start[1] == 'X')) {
961       base = 16;
962       start += 2;
963       if (start >= end) {
964         // "0x" with no digits after is invalid.
965         return false;
966       }
967     } else if (end - start >= 1 && start[0] == '0') {
968       base = 8;
969       start += 1;
970     } else {
971       base = 10;
972     }
973   } else if (base == 16) {
974     if (end - start >= 2 && start[0] == '0' &&
975         (start[1] == 'x' || start[1] == 'X')) {
976       start += 2;
977       if (start >= end) {
978         // "0x" with no digits after is invalid.
979         return false;
980       }
981     }
982   } else if (base >= 2 && base <= 36) {
983     // okay
984   } else {
985     return false;
986   }
987   *text = absl::string_view(start, static_cast<size_t>(end - start));
988   *base_ptr = base;
989   return true;
990 }
991 
992 // Consume digits.
993 //
994 // The classic loop:
995 //
996 //   for each digit
997 //     value = value * base + digit
998 //   value *= sign
999 //
1000 // The classic loop needs overflow checking.  It also fails on the most
1001 // negative integer, -2147483648 in 32-bit two's complement representation.
1002 //
1003 // My improved loop:
1004 //
1005 //  if (!negative)
1006 //    for each digit
1007 //      value = value * base
1008 //      value = value + digit
1009 //  else
1010 //    for each digit
1011 //      value = value * base
1012 //      value = value - digit
1013 //
1014 // Overflow checking becomes simple.
1015 
1016 // Lookup tables per IntType:
1017 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
1018 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
1019 // struct of arrays) would probably be better in terms of d-cache for the most
1020 // commonly used bases.
1021 template <typename IntType>
1022 struct LookupTables {
1023   ABSL_CONST_INIT static const IntType kVmaxOverBase[];
1024   ABSL_CONST_INIT static const IntType kVminOverBase[];
1025 };
1026 
1027 // An array initializer macro for X/base where base in [0, 36].
1028 // However, note that lookups for base in [0, 1] should never happen because
1029 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
1030 #define X_OVER_BASE_INITIALIZER(X)                                        \
1031   {                                                                       \
1032     0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
1033         X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18,   \
1034         X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26,   \
1035         X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34,   \
1036         X / 35, X / 36,                                                   \
1037   }
1038 
1039 // This kVmaxOverBase is generated with
1040 //  for (int base = 2; base < 37; ++base) {
1041 //    absl::uint128 max = std::numeric_limits<absl::uint128>::max();
1042 //    auto result = max / base;
1043 //    std::cout << "    MakeUint128(" << absl::Uint128High64(result) << "u, "
1044 //              << absl::Uint128Low64(result) << "u),\n";
1045 //  }
1046 // See https://godbolt.org/z/aneYsb
1047 //
1048 // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
1049 // array to avoid a static initializer.
1050 template <>
1051 ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
1052     0,
1053     0,
1054     MakeUint128(9223372036854775807u, 18446744073709551615u),
1055     MakeUint128(6148914691236517205u, 6148914691236517205u),
1056     MakeUint128(4611686018427387903u, 18446744073709551615u),
1057     MakeUint128(3689348814741910323u, 3689348814741910323u),
1058     MakeUint128(3074457345618258602u, 12297829382473034410u),
1059     MakeUint128(2635249153387078802u, 5270498306774157604u),
1060     MakeUint128(2305843009213693951u, 18446744073709551615u),
1061     MakeUint128(2049638230412172401u, 14347467612885206812u),
1062     MakeUint128(1844674407370955161u, 11068046444225730969u),
1063     MakeUint128(1676976733973595601u, 8384883669867978007u),
1064     MakeUint128(1537228672809129301u, 6148914691236517205u),
1065     MakeUint128(1418980313362273201u, 4256940940086819603u),
1066     MakeUint128(1317624576693539401u, 2635249153387078802u),
1067     MakeUint128(1229782938247303441u, 1229782938247303441u),
1068     MakeUint128(1152921504606846975u, 18446744073709551615u),
1069     MakeUint128(1085102592571150095u, 1085102592571150095u),
1070     MakeUint128(1024819115206086200u, 16397105843297379214u),
1071     MakeUint128(970881267037344821u, 16504981539634861972u),
1072     MakeUint128(922337203685477580u, 14757395258967641292u),
1073     MakeUint128(878416384462359600u, 14054662151397753612u),
1074     MakeUint128(838488366986797800u, 13415813871788764811u),
1075     MakeUint128(802032351030850070u, 4812194106185100421u),
1076     MakeUint128(768614336404564650u, 12297829382473034410u),
1077     MakeUint128(737869762948382064u, 11805916207174113034u),
1078     MakeUint128(709490156681136600u, 11351842506898185609u),
1079     MakeUint128(683212743470724133u, 17080318586768103348u),
1080     MakeUint128(658812288346769700u, 10540996613548315209u),
1081     MakeUint128(636094623231363848u, 15266270957552732371u),
1082     MakeUint128(614891469123651720u, 9838263505978427528u),
1083     MakeUint128(595056260442243600u, 9520900167075897608u),
1084     MakeUint128(576460752303423487u, 18446744073709551615u),
1085     MakeUint128(558992244657865200u, 8943875914525843207u),
1086     MakeUint128(542551296285575047u, 9765923333140350855u),
1087     MakeUint128(527049830677415760u, 8432797290838652167u),
1088     MakeUint128(512409557603043100u, 8198552921648689607u),
1089 };
1090 
1091 // This kVmaxOverBase generated with
1092 //   for (int base = 2; base < 37; ++base) {
1093 //    absl::int128 max = std::numeric_limits<absl::int128>::max();
1094 //    auto result = max / base;
1095 //    std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
1096 //              << absl::Int128Low64(result) << "u),\n";
1097 //  }
1098 // See https://godbolt.org/z/7djYWz
1099 //
1100 // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
1101 // to avoid a static initializer.
1102 template <>
1103 ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = {
1104     0,
1105     0,
1106     MakeInt128(4611686018427387903, 18446744073709551615u),
1107     MakeInt128(3074457345618258602, 12297829382473034410u),
1108     MakeInt128(2305843009213693951, 18446744073709551615u),
1109     MakeInt128(1844674407370955161, 11068046444225730969u),
1110     MakeInt128(1537228672809129301, 6148914691236517205u),
1111     MakeInt128(1317624576693539401, 2635249153387078802u),
1112     MakeInt128(1152921504606846975, 18446744073709551615u),
1113     MakeInt128(1024819115206086200, 16397105843297379214u),
1114     MakeInt128(922337203685477580, 14757395258967641292u),
1115     MakeInt128(838488366986797800, 13415813871788764811u),
1116     MakeInt128(768614336404564650, 12297829382473034410u),
1117     MakeInt128(709490156681136600, 11351842506898185609u),
1118     MakeInt128(658812288346769700, 10540996613548315209u),
1119     MakeInt128(614891469123651720, 9838263505978427528u),
1120     MakeInt128(576460752303423487, 18446744073709551615u),
1121     MakeInt128(542551296285575047, 9765923333140350855u),
1122     MakeInt128(512409557603043100, 8198552921648689607u),
1123     MakeInt128(485440633518672410, 17475862806672206794u),
1124     MakeInt128(461168601842738790, 7378697629483820646u),
1125     MakeInt128(439208192231179800, 7027331075698876806u),
1126     MakeInt128(419244183493398900, 6707906935894382405u),
1127     MakeInt128(401016175515425035, 2406097053092550210u),
1128     MakeInt128(384307168202282325, 6148914691236517205u),
1129     MakeInt128(368934881474191032, 5902958103587056517u),
1130     MakeInt128(354745078340568300, 5675921253449092804u),
1131     MakeInt128(341606371735362066, 17763531330238827482u),
1132     MakeInt128(329406144173384850, 5270498306774157604u),
1133     MakeInt128(318047311615681924, 7633135478776366185u),
1134     MakeInt128(307445734561825860, 4919131752989213764u),
1135     MakeInt128(297528130221121800, 4760450083537948804u),
1136     MakeInt128(288230376151711743, 18446744073709551615u),
1137     MakeInt128(279496122328932600, 4471937957262921603u),
1138     MakeInt128(271275648142787523, 14106333703424951235u),
1139     MakeInt128(263524915338707880, 4216398645419326083u),
1140     MakeInt128(256204778801521550, 4099276460824344803u),
1141 };
1142 
1143 // This kVminOverBase generated with
1144 //  for (int base = 2; base < 37; ++base) {
1145 //    absl::int128 min = std::numeric_limits<absl::int128>::min();
1146 //    auto result = min / base;
1147 //    std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
1148 //              << absl::Int128Low64(result) << "u),\n";
1149 //  }
1150 //
1151 // See https://godbolt.org/z/7djYWz
1152 //
1153 // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
1154 // to avoid a static initializer.
1155 template <>
1156 ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = {
1157     0,
1158     0,
1159     MakeInt128(-4611686018427387904, 0u),
1160     MakeInt128(-3074457345618258603, 6148914691236517206u),
1161     MakeInt128(-2305843009213693952, 0u),
1162     MakeInt128(-1844674407370955162, 7378697629483820647u),
1163     MakeInt128(-1537228672809129302, 12297829382473034411u),
1164     MakeInt128(-1317624576693539402, 15811494920322472814u),
1165     MakeInt128(-1152921504606846976, 0u),
1166     MakeInt128(-1024819115206086201, 2049638230412172402u),
1167     MakeInt128(-922337203685477581, 3689348814741910324u),
1168     MakeInt128(-838488366986797801, 5030930201920786805u),
1169     MakeInt128(-768614336404564651, 6148914691236517206u),
1170     MakeInt128(-709490156681136601, 7094901566811366007u),
1171     MakeInt128(-658812288346769701, 7905747460161236407u),
1172     MakeInt128(-614891469123651721, 8608480567731124088u),
1173     MakeInt128(-576460752303423488, 0u),
1174     MakeInt128(-542551296285575048, 8680820740569200761u),
1175     MakeInt128(-512409557603043101, 10248191152060862009u),
1176     MakeInt128(-485440633518672411, 970881267037344822u),
1177     MakeInt128(-461168601842738791, 11068046444225730970u),
1178     MakeInt128(-439208192231179801, 11419412998010674810u),
1179     MakeInt128(-419244183493398901, 11738837137815169211u),
1180     MakeInt128(-401016175515425036, 16040647020617001406u),
1181     MakeInt128(-384307168202282326, 12297829382473034411u),
1182     MakeInt128(-368934881474191033, 12543785970122495099u),
1183     MakeInt128(-354745078340568301, 12770822820260458812u),
1184     MakeInt128(-341606371735362067, 683212743470724134u),
1185     MakeInt128(-329406144173384851, 13176245766935394012u),
1186     MakeInt128(-318047311615681925, 10813608594933185431u),
1187     MakeInt128(-307445734561825861, 13527612320720337852u),
1188     MakeInt128(-297528130221121801, 13686293990171602812u),
1189     MakeInt128(-288230376151711744, 0u),
1190     MakeInt128(-279496122328932601, 13974806116446630013u),
1191     MakeInt128(-271275648142787524, 4340410370284600381u),
1192     MakeInt128(-263524915338707881, 14230345428290225533u),
1193     MakeInt128(-256204778801521551, 14347467612885206813u),
1194 };
1195 
1196 template <typename IntType>
1197 ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] =
1198     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
1199 
1200 template <typename IntType>
1201 ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] =
1202     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
1203 
1204 #undef X_OVER_BASE_INITIALIZER
1205 
1206 template <typename IntType>
safe_parse_positive_int(absl::string_view text,int base,absl::Nonnull<IntType * > value_p)1207 inline bool safe_parse_positive_int(absl::string_view text, int base,
1208                                     absl::Nonnull<IntType*> value_p) {
1209   IntType value = 0;
1210   const IntType vmax = std::numeric_limits<IntType>::max();
1211   assert(vmax > 0);
1212   assert(base >= 0);
1213   const IntType base_inttype = static_cast<IntType>(base);
1214   assert(vmax >= base_inttype);
1215   const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
1216   assert(base < 2 ||
1217          std::numeric_limits<IntType>::max() / base_inttype == vmax_over_base);
1218   const char* start = text.data();
1219   const char* end = start + text.size();
1220   // loop over digits
1221   for (; start < end; ++start) {
1222     unsigned char c = static_cast<unsigned char>(start[0]);
1223     IntType digit = static_cast<IntType>(kAsciiToInt[c]);
1224     if (digit >= base_inttype) {
1225       *value_p = value;
1226       return false;
1227     }
1228     if (value > vmax_over_base) {
1229       *value_p = vmax;
1230       return false;
1231     }
1232     value *= base_inttype;
1233     if (value > vmax - digit) {
1234       *value_p = vmax;
1235       return false;
1236     }
1237     value += digit;
1238   }
1239   *value_p = value;
1240   return true;
1241 }
1242 
1243 template <typename IntType>
safe_parse_negative_int(absl::string_view text,int base,absl::Nonnull<IntType * > value_p)1244 inline bool safe_parse_negative_int(absl::string_view text, int base,
1245                                     absl::Nonnull<IntType*> value_p) {
1246   IntType value = 0;
1247   const IntType vmin = std::numeric_limits<IntType>::min();
1248   assert(vmin < 0);
1249   assert(vmin <= 0 - base);
1250   IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
1251   assert(base < 2 ||
1252          std::numeric_limits<IntType>::min() / base == vmin_over_base);
1253   // 2003 c++ standard [expr.mul]
1254   // "... the sign of the remainder is implementation-defined."
1255   // Although (vmin/base)*base + vmin%base is always vmin.
1256   // 2011 c++ standard tightens the spec but we cannot rely on it.
1257   // TODO(junyer): Handle this in the lookup table generation.
1258   if (vmin % base > 0) {
1259     vmin_over_base += 1;
1260   }
1261   const char* start = text.data();
1262   const char* end = start + text.size();
1263   // loop over digits
1264   for (; start < end; ++start) {
1265     unsigned char c = static_cast<unsigned char>(start[0]);
1266     int digit = kAsciiToInt[c];
1267     if (digit >= base) {
1268       *value_p = value;
1269       return false;
1270     }
1271     if (value < vmin_over_base) {
1272       *value_p = vmin;
1273       return false;
1274     }
1275     value *= base;
1276     if (value < vmin + digit) {
1277       *value_p = vmin;
1278       return false;
1279     }
1280     value -= digit;
1281   }
1282   *value_p = value;
1283   return true;
1284 }
1285 
1286 // Input format based on POSIX.1-2008 strtol
1287 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
1288 template <typename IntType>
safe_int_internal(absl::string_view text,absl::Nonnull<IntType * > value_p,int base)1289 inline bool safe_int_internal(absl::string_view text,
1290                               absl::Nonnull<IntType*> value_p, int base) {
1291   *value_p = 0;
1292   bool negative;
1293   if (!safe_parse_sign_and_base(&text, &base, &negative)) {
1294     return false;
1295   }
1296   if (!negative) {
1297     return safe_parse_positive_int(text, base, value_p);
1298   } else {
1299     return safe_parse_negative_int(text, base, value_p);
1300   }
1301 }
1302 
1303 template <typename IntType>
safe_uint_internal(absl::string_view text,absl::Nonnull<IntType * > value_p,int base)1304 inline bool safe_uint_internal(absl::string_view text,
1305                                absl::Nonnull<IntType*> value_p, int base) {
1306   *value_p = 0;
1307   bool negative;
1308   if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
1309     return false;
1310   }
1311   return safe_parse_positive_int(text, base, value_p);
1312 }
1313 }  // anonymous namespace
1314 
1315 namespace numbers_internal {
1316 
1317 // Digit conversion.
1318 ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
1319     "0123456789abcdef";
1320 
1321 ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
1322     "000102030405060708090a0b0c0d0e0f"
1323     "101112131415161718191a1b1c1d1e1f"
1324     "202122232425262728292a2b2c2d2e2f"
1325     "303132333435363738393a3b3c3d3e3f"
1326     "404142434445464748494a4b4c4d4e4f"
1327     "505152535455565758595a5b5c5d5e5f"
1328     "606162636465666768696a6b6c6d6e6f"
1329     "707172737475767778797a7b7c7d7e7f"
1330     "808182838485868788898a8b8c8d8e8f"
1331     "909192939495969798999a9b9c9d9e9f"
1332     "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
1333     "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
1334     "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
1335     "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
1336     "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
1337     "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
1338 
safe_strto32_base(absl::string_view text,absl::Nonnull<int32_t * > value,int base)1339 bool safe_strto32_base(absl::string_view text, absl::Nonnull<int32_t*> value,
1340                        int base) {
1341   return safe_int_internal<int32_t>(text, value, base);
1342 }
1343 
safe_strto64_base(absl::string_view text,absl::Nonnull<int64_t * > value,int base)1344 bool safe_strto64_base(absl::string_view text, absl::Nonnull<int64_t*> value,
1345                        int base) {
1346   return safe_int_internal<int64_t>(text, value, base);
1347 }
1348 
safe_strto128_base(absl::string_view text,absl::Nonnull<int128 * > value,int base)1349 bool safe_strto128_base(absl::string_view text, absl::Nonnull<int128*> value,
1350                         int base) {
1351   return safe_int_internal<absl::int128>(text, value, base);
1352 }
1353 
safe_strtou32_base(absl::string_view text,absl::Nonnull<uint32_t * > value,int base)1354 bool safe_strtou32_base(absl::string_view text, absl::Nonnull<uint32_t*> value,
1355                         int base) {
1356   return safe_uint_internal<uint32_t>(text, value, base);
1357 }
1358 
safe_strtou64_base(absl::string_view text,absl::Nonnull<uint64_t * > value,int base)1359 bool safe_strtou64_base(absl::string_view text, absl::Nonnull<uint64_t*> value,
1360                         int base) {
1361   return safe_uint_internal<uint64_t>(text, value, base);
1362 }
1363 
safe_strtou128_base(absl::string_view text,absl::Nonnull<uint128 * > value,int base)1364 bool safe_strtou128_base(absl::string_view text, absl::Nonnull<uint128*> value,
1365                          int base) {
1366   return safe_uint_internal<absl::uint128>(text, value, base);
1367 }
1368 
1369 }  // namespace numbers_internal
1370 ABSL_NAMESPACE_END
1371 }  // namespace absl
1372