1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // This file contains string processing functions related to
16 // numeric values.
17
18 #include "absl/strings/numbers.h"
19
20 #include <algorithm>
21 #include <cassert>
22 #include <cfloat> // for DBL_DIG and FLT_DIG
23 #include <climits>
24 #include <cmath> // for HUGE_VAL
25 #include <cstddef>
26 #include <cstdint>
27 #include <cstdio>
28 #include <cstdlib>
29 #include <cstring>
30 #include <iterator>
31 #include <limits>
32 #include <system_error> // NOLINT(build/c++11)
33 #include <type_traits>
34 #include <utility>
35
36 #include "absl/base/attributes.h"
37 #include "absl/base/config.h"
38 #include "absl/base/internal/endian.h"
39 #include "absl/base/internal/raw_logging.h"
40 #include "absl/base/nullability.h"
41 #include "absl/base/optimization.h"
42 #include "absl/numeric/bits.h"
43 #include "absl/numeric/int128.h"
44 #include "absl/strings/ascii.h"
45 #include "absl/strings/charconv.h"
46 #include "absl/strings/match.h"
47 #include "absl/strings/string_view.h"
48
49 namespace absl {
50 ABSL_NAMESPACE_BEGIN
51
SimpleAtof(absl::string_view str,absl::Nonnull<float * > out)52 bool SimpleAtof(absl::string_view str, absl::Nonnull<float*> out) {
53 *out = 0.0;
54 str = StripAsciiWhitespace(str);
55 // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one
56 // is present, skip it, while avoiding accepting "+-0" as valid.
57 if (!str.empty() && str[0] == '+') {
58 str.remove_prefix(1);
59 if (!str.empty() && str[0] == '-') {
60 return false;
61 }
62 }
63 auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
64 if (result.ec == std::errc::invalid_argument) {
65 return false;
66 }
67 if (result.ptr != str.data() + str.size()) {
68 // not all non-whitespace characters consumed
69 return false;
70 }
71 // from_chars() with DR 3081's current wording will return max() on
72 // overflow. SimpleAtof returns infinity instead.
73 if (result.ec == std::errc::result_out_of_range) {
74 if (*out > 1.0) {
75 *out = std::numeric_limits<float>::infinity();
76 } else if (*out < -1.0) {
77 *out = -std::numeric_limits<float>::infinity();
78 }
79 }
80 return true;
81 }
82
SimpleAtod(absl::string_view str,absl::Nonnull<double * > out)83 bool SimpleAtod(absl::string_view str, absl::Nonnull<double*> out) {
84 *out = 0.0;
85 str = StripAsciiWhitespace(str);
86 // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one
87 // is present, skip it, while avoiding accepting "+-0" as valid.
88 if (!str.empty() && str[0] == '+') {
89 str.remove_prefix(1);
90 if (!str.empty() && str[0] == '-') {
91 return false;
92 }
93 }
94 auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
95 if (result.ec == std::errc::invalid_argument) {
96 return false;
97 }
98 if (result.ptr != str.data() + str.size()) {
99 // not all non-whitespace characters consumed
100 return false;
101 }
102 // from_chars() with DR 3081's current wording will return max() on
103 // overflow. SimpleAtod returns infinity instead.
104 if (result.ec == std::errc::result_out_of_range) {
105 if (*out > 1.0) {
106 *out = std::numeric_limits<double>::infinity();
107 } else if (*out < -1.0) {
108 *out = -std::numeric_limits<double>::infinity();
109 }
110 }
111 return true;
112 }
113
SimpleAtob(absl::string_view str,absl::Nonnull<bool * > out)114 bool SimpleAtob(absl::string_view str, absl::Nonnull<bool*> out) {
115 ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
116 if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
117 EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
118 EqualsIgnoreCase(str, "1")) {
119 *out = true;
120 return true;
121 }
122 if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
123 EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
124 EqualsIgnoreCase(str, "0")) {
125 *out = false;
126 return true;
127 }
128 return false;
129 }
130
131 // ----------------------------------------------------------------------
132 // FastIntToBuffer() overloads
133 //
134 // Like the Fast*ToBuffer() functions above, these are intended for speed.
135 // Unlike the Fast*ToBuffer() functions, however, these functions write
136 // their output to the beginning of the buffer. The caller is responsible
137 // for ensuring that the buffer has enough space to hold the output.
138 //
139 // Returns a pointer to the end of the string (i.e. the null character
140 // terminating the string).
141 // ----------------------------------------------------------------------
142
143 namespace {
144
145 // Various routines to encode integers to strings.
146
147 // We split data encodings into a group of 2 digits, 4 digits, 8 digits as
148 // it's easier to combine powers of two into scalar arithmetic.
149
150 // Previous implementation used a lookup table of 200 bytes for every 2 bytes
151 // and it was memory bound, any L1 cache miss would result in a much slower
152 // result. When benchmarking with a cache eviction rate of several percent,
153 // this implementation proved to be better.
154
155 // These constants represent '00', '0000' and '00000000' as ascii strings in
156 // integers. We can add these numbers if we encode to bytes from 0 to 9. as
157 // 'i' = '0' + i for 0 <= i <= 9.
158 constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
159 constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
160 constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
161
162 template <typename T>
Pow(T base,uint32_t n)163 constexpr T Pow(T base, uint32_t n) {
164 // Exponentiation by squaring
165 return static_cast<T>((n > 1 ? Pow(base * base, n >> 1) : static_cast<T>(1)) *
166 ((n & 1) ? base : static_cast<T>(1)));
167 }
168
169 // Given n, calculates C where the following holds for all 0 <= x < Pow(100, n):
170 // x / Pow(10, n) == x * C / Pow(2, n * 10)
171 // In other words, it allows us to divide by a power of 10 via a single
172 // multiplication and bit shifts, assuming the input will be smaller than the
173 // square of that power of 10.
174 template <typename T>
ComputePowerOf100DivisionCoefficient(uint32_t n)175 constexpr T ComputePowerOf100DivisionCoefficient(uint32_t n) {
176 if (n > 4) {
177 // This doesn't work for large powers of 100, due to overflow
178 abort();
179 }
180 T denom = 16 - 1;
181 T num = (denom + 1) - 10;
182 T gcd = 3; // Greatest common divisor of numerator and denominator
183 denom = Pow(denom / gcd, n);
184 num = Pow(num / gcd, 9 * n);
185 T quotient = num / denom;
186 if (num % denom >= denom / 2) {
187 // Round up, since the remainder is more than half the denominator
188 ++quotient;
189 }
190 return quotient;
191 }
192
193 // * kDivisionBy10Mul / kDivisionBy10Div is a division by 10 for values from 0
194 // to 99. It's also a division of a structure [k takes 2 bytes][m takes 2
195 // bytes], then * kDivisionBy10Mul / kDivisionBy10Div will be [k / 10][m / 10].
196 // It allows parallel division.
197 constexpr uint64_t kDivisionBy10Mul =
198 ComputePowerOf100DivisionCoefficient<uint64_t>(1);
199 static_assert(kDivisionBy10Mul == 103,
200 "division coefficient for 10 is incorrect");
201 constexpr uint64_t kDivisionBy10Div = 1 << 10;
202
203 // * kDivisionBy100Mul / kDivisionBy100Div is a division by 100 for values from
204 // 0 to 9999.
205 constexpr uint64_t kDivisionBy100Mul =
206 ComputePowerOf100DivisionCoefficient<uint64_t>(2);
207 static_assert(kDivisionBy100Mul == 10486,
208 "division coefficient for 100 is incorrect");
209 constexpr uint64_t kDivisionBy100Div = 1 << 20;
210
211 static_assert(ComputePowerOf100DivisionCoefficient<uint64_t>(3) == 1073742,
212 "division coefficient for 1000 is incorrect");
213
214 // Same as `PrepareEightDigits`, but produces 2 digits for integers < 100.
PrepareTwoDigitsImpl(uint32_t i,bool reversed)215 inline uint32_t PrepareTwoDigitsImpl(uint32_t i, bool reversed) {
216 assert(i < 100);
217 uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
218 uint32_t mod10 = i - 10u * div10;
219 return (div10 << (reversed ? 8 : 0)) + (mod10 << (reversed ? 0 : 8));
220 }
PrepareTwoDigits(uint32_t i)221 inline uint32_t PrepareTwoDigits(uint32_t i) {
222 return PrepareTwoDigitsImpl(i, false);
223 }
224
225 // Same as `PrepareEightDigits`, but produces 4 digits for integers < 10000.
PrepareFourDigitsImpl(uint32_t n,bool reversed)226 inline uint32_t PrepareFourDigitsImpl(uint32_t n, bool reversed) {
227 // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
228 // blocks. 123 -> [\0\1][\0\23]. We divide by 10 both blocks
229 // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
230 // parallel". Then we combine both results to have both ASCII digits,
231 // strip trailing zeros, add ASCII '0000' and return.
232 uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
233 uint32_t mod100 = n - 100ull * div100;
234 uint32_t hundreds =
235 (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
236 uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
237 tens &= (0xFull << 16) | 0xFull;
238 tens = (tens << (reversed ? 8 : 0)) +
239 static_cast<uint32_t>((hundreds - 10ull * tens) << (reversed ? 0 : 8));
240 return tens;
241 }
PrepareFourDigits(uint32_t n)242 inline uint32_t PrepareFourDigits(uint32_t n) {
243 return PrepareFourDigitsImpl(n, false);
244 }
PrepareFourDigitsReversed(uint32_t n)245 inline uint32_t PrepareFourDigitsReversed(uint32_t n) {
246 return PrepareFourDigitsImpl(n, true);
247 }
248
249 // Helper function to produce an ASCII representation of `i`.
250 //
251 // Function returns an 8-byte integer which when summed with `kEightZeroBytes`,
252 // can be treated as a printable buffer with ascii representation of `i`,
253 // possibly with leading zeros.
254 //
255 // Example:
256 //
257 // uint64_t buffer = PrepareEightDigits(102030) + kEightZeroBytes;
258 // char* ascii = reinterpret_cast<char*>(&buffer);
259 // // Note two leading zeros:
260 // EXPECT_EQ(absl::string_view(ascii, 8), "00102030");
261 //
262 // If `Reversed` is set to true, the result becomes reversed to "03020100".
263 //
264 // Pre-condition: `i` must be less than 100000000.
PrepareEightDigitsImpl(uint32_t i,bool reversed)265 inline uint64_t PrepareEightDigitsImpl(uint32_t i, bool reversed) {
266 ABSL_ASSUME(i < 10000'0000);
267 // Prepare 2 blocks of 4 digits "in parallel".
268 uint32_t hi = i / 10000;
269 uint32_t lo = i % 10000;
270 uint64_t merged = (uint64_t{hi} << (reversed ? 32 : 0)) |
271 (uint64_t{lo} << (reversed ? 0 : 32));
272 uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
273 ((0x7Full << 32) | 0x7Full);
274 uint64_t mod100 = merged - 100ull * div100;
275 uint64_t hundreds =
276 (mod100 << (reversed ? 0 : 16)) + (div100 << (reversed ? 16 : 0));
277 uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
278 tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
279 tens = (tens << (reversed ? 8 : 0)) +
280 ((hundreds - 10ull * tens) << (reversed ? 0 : 8));
281 return tens;
282 }
PrepareEightDigits(uint32_t i)283 inline uint64_t PrepareEightDigits(uint32_t i) {
284 return PrepareEightDigitsImpl(i, false);
285 }
PrepareEightDigitsReversed(uint32_t i)286 inline uint64_t PrepareEightDigitsReversed(uint32_t i) {
287 return PrepareEightDigitsImpl(i, true);
288 }
289
290 template <typename T, typename BackwardIt>
291 class FastUIntToStringConverter {
292 static_assert(
293 std::is_same<T, decltype(+std::declval<T>())>::value,
294 "to avoid code bloat, only instantiate this for int and larger types");
295 static_assert(std::is_unsigned<T>::value,
296 "this class is only for unsigned types");
297
298 public:
299 // Outputs the given number backward (like with std::copy_backward),
300 // starting from the end of the string.
301 // The number of digits in the number must have been already measured and
302 // passed *exactly*, otherwise the behavior is undefined.
303 // (This is an optimization, as calculating the number of digits again would
304 // slow down the hot path.)
305 // Returns an iterator to the start of the suffix that was appended.
FastIntToBufferBackward(T v,BackwardIt end)306 static BackwardIt FastIntToBufferBackward(T v, BackwardIt end) {
307 // THIS IS A HOT FUNCTION with a very deliberate structure to exploit branch
308 // prediction and shorten the critical path for smaller numbers.
309 // Do not move around the if/else blocks or attempt to simplify it
310 // without benchmarking any changes.
311
312 if (v < 10) {
313 goto AT_LEAST_1 /* NOTE: mandatory for the 0 case */;
314 }
315 if (v < 1000) {
316 goto AT_LEAST_10;
317 }
318 if (v < 10000000) {
319 goto AT_LEAST_1000;
320 }
321
322 if (v >= 100000000 / 10) {
323 if (v >= 10000000000000000 / 10) {
324 DoFastIntToBufferBackward<8>(v, end);
325 }
326 DoFastIntToBufferBackward<8>(v, end);
327 }
328
329 if (v >= 10000 / 10) {
330 AT_LEAST_1000:
331 DoFastIntToBufferBackward<4>(v, end);
332 }
333
334 if (v >= 100 / 10) {
335 AT_LEAST_10:
336 DoFastIntToBufferBackward<2>(v, end);
337 }
338
339 if (v >= 10 / 10) {
340 AT_LEAST_1:
341 end = DoFastIntToBufferBackward(v, end, std::integral_constant<int, 1>());
342 }
343 return end;
344 }
345
346 private:
347 // Only assume pointers are contiguous for now. String and vector iterators
348 // could be special-cased as well, but there's no need for them here.
349 // With C++20 we can probably switch to std::contiguous_iterator_tag.
350 static constexpr bool kIsContiguousIterator =
351 std::is_pointer<BackwardIt>::value;
352
353 template <int Exponent>
DoFastIntToBufferBackward(T & v,BackwardIt & end)354 static void DoFastIntToBufferBackward(T& v, BackwardIt& end) {
355 constexpr T kModulus = Pow<T>(10, Exponent);
356 T remainder = static_cast<T>(v % kModulus);
357 v = static_cast<T>(v / kModulus);
358 end = DoFastIntToBufferBackward(remainder, end,
359 std::integral_constant<int, Exponent>());
360 }
361
DoFastIntToBufferBackward(const T &,BackwardIt end,std::integral_constant<int,0>)362 static BackwardIt DoFastIntToBufferBackward(const T&, BackwardIt end,
363 std::integral_constant<int, 0>) {
364 return end;
365 }
366
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,1>)367 static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
368 std::integral_constant<int, 1>) {
369 *--end = static_cast<char>('0' + v);
370 return DoFastIntToBufferBackward(v, end, std::integral_constant<int, 0>());
371 }
372
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,4>)373 static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
374 std::integral_constant<int, 4>) {
375 if (kIsContiguousIterator) {
376 const uint32_t digits =
377 PrepareFourDigits(static_cast<uint32_t>(v)) + kFourZeroBytes;
378 end -= sizeof(digits);
379 little_endian::Store32(&*end, digits);
380 } else {
381 uint32_t digits =
382 PrepareFourDigitsReversed(static_cast<uint32_t>(v)) + kFourZeroBytes;
383 for (size_t i = 0; i < sizeof(digits); ++i) {
384 *--end = static_cast<char>(digits);
385 digits >>= CHAR_BIT;
386 }
387 }
388 return end;
389 }
390
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,8>)391 static BackwardIt DoFastIntToBufferBackward(T v, BackwardIt end,
392 std::integral_constant<int, 8>) {
393 if (kIsContiguousIterator) {
394 const uint64_t digits =
395 PrepareEightDigits(static_cast<uint32_t>(v)) + kEightZeroBytes;
396 end -= sizeof(digits);
397 little_endian::Store64(&*end, digits);
398 } else {
399 uint64_t digits = PrepareEightDigitsReversed(static_cast<uint32_t>(v)) +
400 kEightZeroBytes;
401 for (size_t i = 0; i < sizeof(digits); ++i) {
402 *--end = static_cast<char>(digits);
403 digits >>= CHAR_BIT;
404 }
405 }
406 return end;
407 }
408
409 template <int Digits>
DoFastIntToBufferBackward(T v,BackwardIt end,std::integral_constant<int,Digits>)410 static BackwardIt DoFastIntToBufferBackward(
411 T v, BackwardIt end, std::integral_constant<int, Digits>) {
412 constexpr int kLogModulus = Digits - Digits / 2;
413 constexpr T kModulus = Pow(static_cast<T>(10), kLogModulus);
414 bool is_safe_to_use_division_trick = Digits <= 8;
415 T quotient, remainder;
416 if (is_safe_to_use_division_trick) {
417 constexpr uint64_t kCoefficient =
418 ComputePowerOf100DivisionCoefficient<uint64_t>(kLogModulus);
419 quotient = (v * kCoefficient) >> (10 * kLogModulus);
420 remainder = v - quotient * kModulus;
421 } else {
422 quotient = v / kModulus;
423 remainder = v % kModulus;
424 }
425 end = DoFastIntToBufferBackward(remainder, end,
426 std::integral_constant<int, kLogModulus>());
427 return DoFastIntToBufferBackward(
428 quotient, end, std::integral_constant<int, Digits - kLogModulus>());
429 }
430 };
431
432 // Returns an iterator to the start of the suffix that was appended
433 template <typename T, typename BackwardIt>
434 std::enable_if_t<std::is_unsigned<T>::value, BackwardIt>
DoFastIntToBufferBackward(T v,BackwardIt end,uint32_t digits)435 DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
436 using PromotedT = std::decay_t<decltype(+v)>;
437 using Converter = FastUIntToStringConverter<PromotedT, BackwardIt>;
438 (void)digits;
439 return Converter().FastIntToBufferBackward(v, end);
440 }
441
442 template <typename T, typename BackwardIt>
443 std::enable_if_t<std::is_signed<T>::value, BackwardIt>
DoFastIntToBufferBackward(T v,BackwardIt end,uint32_t digits)444 DoFastIntToBufferBackward(T v, BackwardIt end, uint32_t digits) {
445 if (absl::numbers_internal::IsNegative(v)) {
446 // Store the minus sign *before* we produce the number itself, not after.
447 // This gets us a tail call.
448 end[-static_cast<ptrdiff_t>(digits) - 1] = '-';
449 }
450 return DoFastIntToBufferBackward(
451 absl::numbers_internal::UnsignedAbsoluteValue(v), end, digits);
452 }
453
454 template <class T>
455 std::enable_if_t<std::is_integral<T>::value, int>
GetNumDigitsOrNegativeIfNegativeImpl(T v)456 GetNumDigitsOrNegativeIfNegativeImpl(T v) {
457 const auto /* either bool or std::false_type */ is_negative =
458 absl::numbers_internal::IsNegative(v);
459 const int digits = static_cast<int>(absl::numbers_internal::Base10Digits(
460 absl::numbers_internal::UnsignedAbsoluteValue(v)));
461 return is_negative ? ~digits : digits;
462 }
463
464 } // namespace
465
PutTwoDigits(uint32_t i,absl::Nonnull<char * > buf)466 void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) {
467 little_endian::Store16(
468 buf, static_cast<uint16_t>(PrepareTwoDigits(i) + kTwoZeroBytes));
469 }
470
FastIntToBuffer(uint32_t i,absl::Nonnull<char * > buffer)471 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
472 uint32_t i, absl::Nonnull<char*> buffer) {
473 const uint32_t digits = absl::numbers_internal::Base10Digits(i);
474 buffer += digits;
475 *buffer = '\0'; // We're going backward, so store this first
476 FastIntToBufferBackward(i, buffer, digits);
477 return buffer;
478 }
479
FastIntToBuffer(int32_t i,absl::Nonnull<char * > buffer)480 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
481 int32_t i, absl::Nonnull<char*> buffer) {
482 buffer += static_cast<int>(i < 0);
483 uint32_t digits = absl::numbers_internal::Base10Digits(
484 absl::numbers_internal::UnsignedAbsoluteValue(i));
485 buffer += digits;
486 *buffer = '\0'; // We're going backward, so store this first
487 FastIntToBufferBackward(i, buffer, digits);
488 return buffer;
489 }
490
FastIntToBuffer(uint64_t i,absl::Nonnull<char * > buffer)491 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
492 uint64_t i, absl::Nonnull<char*> buffer) {
493 uint32_t digits = absl::numbers_internal::Base10Digits(i);
494 buffer += digits;
495 *buffer = '\0'; // We're going backward, so store this first
496 FastIntToBufferBackward(i, buffer, digits);
497 return buffer;
498 }
499
FastIntToBuffer(int64_t i,absl::Nonnull<char * > buffer)500 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
501 int64_t i, absl::Nonnull<char*> buffer) {
502 buffer += static_cast<int>(i < 0);
503 uint32_t digits = absl::numbers_internal::Base10Digits(
504 absl::numbers_internal::UnsignedAbsoluteValue(i));
505 buffer += digits;
506 *buffer = '\0'; // We're going backward, so store this first
507 FastIntToBufferBackward(i, buffer, digits);
508 return buffer;
509 }
510
FastIntToBufferBackward(uint32_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)511 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
512 uint32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
513 return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
514 }
515
FastIntToBufferBackward(int32_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)516 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
517 int32_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
518 return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
519 }
520
FastIntToBufferBackward(uint64_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)521 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
522 uint64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
523 return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
524 }
525
FastIntToBufferBackward(int64_t i,absl::Nonnull<char * > buffer_end,uint32_t exact_digit_count)526 absl::Nonnull<char*> numbers_internal::FastIntToBufferBackward(
527 int64_t i, absl::Nonnull<char*> buffer_end, uint32_t exact_digit_count) {
528 return DoFastIntToBufferBackward(i, buffer_end, exact_digit_count);
529 }
530
GetNumDigitsOrNegativeIfNegative(signed char v)531 int numbers_internal::GetNumDigitsOrNegativeIfNegative(signed char v) {
532 return GetNumDigitsOrNegativeIfNegativeImpl(v);
533 }
GetNumDigitsOrNegativeIfNegative(unsigned char v)534 int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned char v) {
535 return GetNumDigitsOrNegativeIfNegativeImpl(v);
536 }
GetNumDigitsOrNegativeIfNegative(short v)537 int numbers_internal::GetNumDigitsOrNegativeIfNegative(short v) { // NOLINT
538 return GetNumDigitsOrNegativeIfNegativeImpl(v);
539 }
GetNumDigitsOrNegativeIfNegative(unsigned short v)540 int numbers_internal::GetNumDigitsOrNegativeIfNegative(
541 unsigned short v) { // NOLINT
542 return GetNumDigitsOrNegativeIfNegativeImpl(v);
543 }
GetNumDigitsOrNegativeIfNegative(int v)544 int numbers_internal::GetNumDigitsOrNegativeIfNegative(int v) {
545 return GetNumDigitsOrNegativeIfNegativeImpl(v);
546 }
GetNumDigitsOrNegativeIfNegative(unsigned int v)547 int numbers_internal::GetNumDigitsOrNegativeIfNegative(unsigned int v) {
548 return GetNumDigitsOrNegativeIfNegativeImpl(v);
549 }
GetNumDigitsOrNegativeIfNegative(long v)550 int numbers_internal::GetNumDigitsOrNegativeIfNegative(long v) { // NOLINT
551 return GetNumDigitsOrNegativeIfNegativeImpl(v);
552 }
GetNumDigitsOrNegativeIfNegative(unsigned long v)553 int numbers_internal::GetNumDigitsOrNegativeIfNegative(
554 unsigned long v) { // NOLINT
555 return GetNumDigitsOrNegativeIfNegativeImpl(v);
556 }
GetNumDigitsOrNegativeIfNegative(long long v)557 int numbers_internal::GetNumDigitsOrNegativeIfNegative(long long v) { // NOLINT
558 return GetNumDigitsOrNegativeIfNegativeImpl(v);
559 }
GetNumDigitsOrNegativeIfNegative(unsigned long long v)560 int numbers_internal::GetNumDigitsOrNegativeIfNegative(
561 unsigned long long v) { // NOLINT
562 return GetNumDigitsOrNegativeIfNegativeImpl(v);
563 }
564
565 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
566 // return that number multiplied by the given 32-bit value. If the result is
567 // too large to fit in a 128-bit number, divide it by 2 until it fits.
Mul32(std::pair<uint64_t,uint64_t> num,uint32_t mul)568 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
569 uint32_t mul) {
570 uint64_t bits0_31 = num.second & 0xFFFFFFFF;
571 uint64_t bits32_63 = num.second >> 32;
572 uint64_t bits64_95 = num.first & 0xFFFFFFFF;
573 uint64_t bits96_127 = num.first >> 32;
574
575 // The picture so far: each of these 64-bit values has only the lower 32 bits
576 // filled in.
577 // bits96_127: [ 00000000 xxxxxxxx ]
578 // bits64_95: [ 00000000 xxxxxxxx ]
579 // bits32_63: [ 00000000 xxxxxxxx ]
580 // bits0_31: [ 00000000 xxxxxxxx ]
581
582 bits0_31 *= mul;
583 bits32_63 *= mul;
584 bits64_95 *= mul;
585 bits96_127 *= mul;
586
587 // Now the top halves may also have value, though all 64 of their bits will
588 // never be set at the same time, since they are a result of a 32x32 bit
589 // multiply. This makes the carry calculation slightly easier.
590 // bits96_127: [ mmmmmmmm | mmmmmmmm ]
591 // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
592 // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
593 // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
594 // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
595
596 uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
597 uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
598 (bits0_63 < bits0_31);
599 uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
600 if (bits128_up == 0) return {bits64_127, bits0_63};
601
602 auto shift = static_cast<unsigned>(bit_width(bits128_up));
603 uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
604 uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
605 return {hi, lo};
606 }
607
608 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
609 // where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
610 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
PowFive(uint64_t num,int expfive)611 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
612 std::pair<uint64_t, uint64_t> result = {num, 0};
613 while (expfive >= 13) {
614 // 5^13 is the highest power of five that will fit in a 32-bit integer.
615 result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
616 expfive -= 13;
617 }
618 constexpr uint32_t powers_of_five[13] = {
619 1,
620 5,
621 5 * 5,
622 5 * 5 * 5,
623 5 * 5 * 5 * 5,
624 5 * 5 * 5 * 5 * 5,
625 5 * 5 * 5 * 5 * 5 * 5,
626 5 * 5 * 5 * 5 * 5 * 5 * 5,
627 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
628 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
629 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
630 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
631 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
632 result = Mul32(result, powers_of_five[expfive & 15]);
633 int shift = countl_zero(result.first);
634 if (shift != 0) {
635 result.first = (result.first << shift) + (result.second >> (64 - shift));
636 result.second = (result.second << shift);
637 }
638 return result;
639 }
640
641 struct ExpDigits {
642 int32_t exponent;
643 char digits[6];
644 };
645
646 // SplitToSix converts value, a positive double-precision floating-point number,
647 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
648 // zero. For example, SplitToSix(1) returns an exponent of zero and a digits
649 // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
650 // two possible representations, e.g. value = 100000.5, then "round to even" is
651 // performed.
SplitToSix(const double value)652 static ExpDigits SplitToSix(const double value) {
653 ExpDigits exp_dig;
654 int exp = 5;
655 double d = value;
656 // First step: calculate a close approximation of the output, where the
657 // value d will be between 100,000 and 999,999, representing the digits
658 // in the output ASCII array, and exp is the base-10 exponent. It would be
659 // faster to use a table here, and to look up the base-2 exponent of value,
660 // however value is an IEEE-754 64-bit number, so the table would have 2,000
661 // entries, which is not cache-friendly.
662 if (d >= 999999.5) {
663 if (d >= 1e+261) exp += 256, d *= 1e-256;
664 if (d >= 1e+133) exp += 128, d *= 1e-128;
665 if (d >= 1e+69) exp += 64, d *= 1e-64;
666 if (d >= 1e+37) exp += 32, d *= 1e-32;
667 if (d >= 1e+21) exp += 16, d *= 1e-16;
668 if (d >= 1e+13) exp += 8, d *= 1e-8;
669 if (d >= 1e+9) exp += 4, d *= 1e-4;
670 if (d >= 1e+7) exp += 2, d *= 1e-2;
671 if (d >= 1e+6) exp += 1, d *= 1e-1;
672 } else {
673 if (d < 1e-250) exp -= 256, d *= 1e256;
674 if (d < 1e-122) exp -= 128, d *= 1e128;
675 if (d < 1e-58) exp -= 64, d *= 1e64;
676 if (d < 1e-26) exp -= 32, d *= 1e32;
677 if (d < 1e-10) exp -= 16, d *= 1e16;
678 if (d < 1e-2) exp -= 8, d *= 1e8;
679 if (d < 1e+2) exp -= 4, d *= 1e4;
680 if (d < 1e+4) exp -= 2, d *= 1e2;
681 if (d < 1e+5) exp -= 1, d *= 1e1;
682 }
683 // At this point, d is in the range [99999.5..999999.5) and exp is in the
684 // range [-324..308]. Since we need to round d up, we want to add a half
685 // and truncate.
686 // However, the technique above may have lost some precision, due to its
687 // repeated multiplication by constants that each may be off by half a bit
688 // of precision. This only matters if we're close to the edge though.
689 // Since we'd like to know if the fractional part of d is close to a half,
690 // we multiply it by 65536 and see if the fractional part is close to 32768.
691 // (The number doesn't have to be a power of two,but powers of two are faster)
692 uint64_t d64k = d * 65536;
693 uint32_t dddddd; // A 6-digit decimal integer.
694 if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
695 // OK, it's fairly likely that precision was lost above, which is
696 // not a surprise given only 52 mantissa bits are available. Therefore
697 // redo the calculation using 128-bit numbers. (64 bits are not enough).
698
699 // Start out with digits rounded down; maybe add one below.
700 dddddd = static_cast<uint32_t>(d64k / 65536);
701
702 // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
703 // value we're representing, of course, is M.mmm... * 2^exp2.
704 int exp2;
705 double m = std::frexp(value, &exp2);
706 uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
707 // std::frexp returns an m value in the range [0.5, 1.0), however we
708 // can't multiply it by 2^64 and convert to an integer because some FPUs
709 // throw an exception when converting an number higher than 2^63 into an
710 // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
711 // since m only has 52 significant bits anyway.
712 mantissa <<= 1;
713 exp2 -= 64; // not needed, but nice for debugging
714
715 // OK, we are here to compare:
716 // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
717 // so we can round up dddddd if appropriate. Those values span the full
718 // range of 600 orders of magnitude of IEE 64-bit floating-point.
719 // Fortunately, we already know they are very close, so we don't need to
720 // track the base-2 exponent of both sides. This greatly simplifies the
721 // the math since the 2^exp2 calculation is unnecessary and the power-of-10
722 // calculation can become a power-of-5 instead.
723
724 std::pair<uint64_t, uint64_t> edge, val;
725 if (exp >= 6) {
726 // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
727 // Since we're tossing powers of two, 2 * dddddd + 1 is the
728 // same as dddddd + 0.5
729 edge = PowFive(2 * dddddd + 1, exp - 5);
730
731 val.first = mantissa;
732 val.second = 0;
733 } else {
734 // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
735 // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
736 // mantissa * 5 ^ (5 - exp)
737 edge = PowFive(2 * dddddd + 1, 0);
738
739 val = PowFive(mantissa, 5 - exp);
740 }
741 // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
742 // val.second, edge.first, edge.second);
743 if (val > edge) {
744 dddddd++;
745 } else if (val == edge) {
746 dddddd += (dddddd & 1);
747 }
748 } else {
749 // Here, we are not close to the edge.
750 dddddd = static_cast<uint32_t>((d64k + 32768) / 65536);
751 }
752 if (dddddd == 1000000) {
753 dddddd = 100000;
754 exp += 1;
755 }
756 exp_dig.exponent = exp;
757
758 uint32_t two_digits = dddddd / 10000;
759 dddddd -= two_digits * 10000;
760 numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
761
762 two_digits = dddddd / 100;
763 dddddd -= two_digits * 100;
764 numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
765
766 numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
767 return exp_dig;
768 }
769
770 // Helper function for fast formatting of floating-point.
771 // The result is the same as "%g", a.k.a. "%.6g".
SixDigitsToBuffer(double d,absl::Nonnull<char * > const buffer)772 size_t numbers_internal::SixDigitsToBuffer(double d,
773 absl::Nonnull<char*> const buffer) {
774 static_assert(std::numeric_limits<float>::is_iec559,
775 "IEEE-754/IEC-559 support only");
776
777 char* out = buffer; // we write data to out, incrementing as we go, but
778 // FloatToBuffer always returns the address of the buffer
779 // passed in.
780
781 if (std::isnan(d)) {
782 strcpy(out, "nan"); // NOLINT(runtime/printf)
783 return 3;
784 }
785 if (d == 0) { // +0 and -0 are handled here
786 if (std::signbit(d)) *out++ = '-';
787 *out++ = '0';
788 *out = 0;
789 return static_cast<size_t>(out - buffer);
790 }
791 if (d < 0) {
792 *out++ = '-';
793 d = -d;
794 }
795 if (d > std::numeric_limits<double>::max()) {
796 strcpy(out, "inf"); // NOLINT(runtime/printf)
797 return static_cast<size_t>(out + 3 - buffer);
798 }
799
800 auto exp_dig = SplitToSix(d);
801 int exp = exp_dig.exponent;
802 const char* digits = exp_dig.digits;
803 out[0] = '0';
804 out[1] = '.';
805 switch (exp) {
806 case 5:
807 memcpy(out, &digits[0], 6), out += 6;
808 *out = 0;
809 return static_cast<size_t>(out - buffer);
810 case 4:
811 memcpy(out, &digits[0], 5), out += 5;
812 if (digits[5] != '0') {
813 *out++ = '.';
814 *out++ = digits[5];
815 }
816 *out = 0;
817 return static_cast<size_t>(out - buffer);
818 case 3:
819 memcpy(out, &digits[0], 4), out += 4;
820 if ((digits[5] | digits[4]) != '0') {
821 *out++ = '.';
822 *out++ = digits[4];
823 if (digits[5] != '0') *out++ = digits[5];
824 }
825 *out = 0;
826 return static_cast<size_t>(out - buffer);
827 case 2:
828 memcpy(out, &digits[0], 3), out += 3;
829 *out++ = '.';
830 memcpy(out, &digits[3], 3);
831 out += 3;
832 while (out[-1] == '0') --out;
833 if (out[-1] == '.') --out;
834 *out = 0;
835 return static_cast<size_t>(out - buffer);
836 case 1:
837 memcpy(out, &digits[0], 2), out += 2;
838 *out++ = '.';
839 memcpy(out, &digits[2], 4);
840 out += 4;
841 while (out[-1] == '0') --out;
842 if (out[-1] == '.') --out;
843 *out = 0;
844 return static_cast<size_t>(out - buffer);
845 case 0:
846 memcpy(out, &digits[0], 1), out += 1;
847 *out++ = '.';
848 memcpy(out, &digits[1], 5);
849 out += 5;
850 while (out[-1] == '0') --out;
851 if (out[-1] == '.') --out;
852 *out = 0;
853 return static_cast<size_t>(out - buffer);
854 case -4:
855 out[2] = '0';
856 ++out;
857 ABSL_FALLTHROUGH_INTENDED;
858 case -3:
859 out[2] = '0';
860 ++out;
861 ABSL_FALLTHROUGH_INTENDED;
862 case -2:
863 out[2] = '0';
864 ++out;
865 ABSL_FALLTHROUGH_INTENDED;
866 case -1:
867 out += 2;
868 memcpy(out, &digits[0], 6);
869 out += 6;
870 while (out[-1] == '0') --out;
871 *out = 0;
872 return static_cast<size_t>(out - buffer);
873 }
874 assert(exp < -4 || exp >= 6);
875 out[0] = digits[0];
876 assert(out[1] == '.');
877 out += 2;
878 memcpy(out, &digits[1], 5), out += 5;
879 while (out[-1] == '0') --out;
880 if (out[-1] == '.') --out;
881 *out++ = 'e';
882 if (exp > 0) {
883 *out++ = '+';
884 } else {
885 *out++ = '-';
886 exp = -exp;
887 }
888 if (exp > 99) {
889 int dig1 = exp / 100;
890 exp -= dig1 * 100;
891 *out++ = '0' + static_cast<char>(dig1);
892 }
893 PutTwoDigits(static_cast<uint32_t>(exp), out);
894 out += 2;
895 *out = 0;
896 return static_cast<size_t>(out - buffer);
897 }
898
899 namespace {
900 // Represents integer values of digits.
901 // Uses 36 to indicate an invalid character since we support
902 // bases up to 36.
903 static const int8_t kAsciiToInt[256] = {
904 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
905 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
906 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
907 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
908 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
909 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
910 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
911 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
912 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
913 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
914 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
915 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
916 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
917 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
918
919 // Parse the sign and optional hex or oct prefix in text.
safe_parse_sign_and_base(absl::Nonnull<absl::string_view * > text,absl::Nonnull<int * > base_ptr,absl::Nonnull<bool * > negative_ptr)920 inline bool safe_parse_sign_and_base(
921 absl::Nonnull<absl::string_view*> text /*inout*/,
922 absl::Nonnull<int*> base_ptr /*inout*/,
923 absl::Nonnull<bool*> negative_ptr /*output*/) {
924 if (text->data() == nullptr) {
925 return false;
926 }
927
928 const char* start = text->data();
929 const char* end = start + text->size();
930 int base = *base_ptr;
931
932 // Consume whitespace.
933 while (start < end &&
934 absl::ascii_isspace(static_cast<unsigned char>(start[0]))) {
935 ++start;
936 }
937 while (start < end &&
938 absl::ascii_isspace(static_cast<unsigned char>(end[-1]))) {
939 --end;
940 }
941 if (start >= end) {
942 return false;
943 }
944
945 // Consume sign.
946 *negative_ptr = (start[0] == '-');
947 if (*negative_ptr || start[0] == '+') {
948 ++start;
949 if (start >= end) {
950 return false;
951 }
952 }
953
954 // Consume base-dependent prefix.
955 // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
956 // base 16: "0x" -> base 16
957 // Also validate the base.
958 if (base == 0) {
959 if (end - start >= 2 && start[0] == '0' &&
960 (start[1] == 'x' || start[1] == 'X')) {
961 base = 16;
962 start += 2;
963 if (start >= end) {
964 // "0x" with no digits after is invalid.
965 return false;
966 }
967 } else if (end - start >= 1 && start[0] == '0') {
968 base = 8;
969 start += 1;
970 } else {
971 base = 10;
972 }
973 } else if (base == 16) {
974 if (end - start >= 2 && start[0] == '0' &&
975 (start[1] == 'x' || start[1] == 'X')) {
976 start += 2;
977 if (start >= end) {
978 // "0x" with no digits after is invalid.
979 return false;
980 }
981 }
982 } else if (base >= 2 && base <= 36) {
983 // okay
984 } else {
985 return false;
986 }
987 *text = absl::string_view(start, static_cast<size_t>(end - start));
988 *base_ptr = base;
989 return true;
990 }
991
992 // Consume digits.
993 //
994 // The classic loop:
995 //
996 // for each digit
997 // value = value * base + digit
998 // value *= sign
999 //
1000 // The classic loop needs overflow checking. It also fails on the most
1001 // negative integer, -2147483648 in 32-bit two's complement representation.
1002 //
1003 // My improved loop:
1004 //
1005 // if (!negative)
1006 // for each digit
1007 // value = value * base
1008 // value = value + digit
1009 // else
1010 // for each digit
1011 // value = value * base
1012 // value = value - digit
1013 //
1014 // Overflow checking becomes simple.
1015
1016 // Lookup tables per IntType:
1017 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
1018 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
1019 // struct of arrays) would probably be better in terms of d-cache for the most
1020 // commonly used bases.
1021 template <typename IntType>
1022 struct LookupTables {
1023 ABSL_CONST_INIT static const IntType kVmaxOverBase[];
1024 ABSL_CONST_INIT static const IntType kVminOverBase[];
1025 };
1026
1027 // An array initializer macro for X/base where base in [0, 36].
1028 // However, note that lookups for base in [0, 1] should never happen because
1029 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
1030 #define X_OVER_BASE_INITIALIZER(X) \
1031 { \
1032 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
1033 X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
1034 X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
1035 X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
1036 X / 35, X / 36, \
1037 }
1038
1039 // This kVmaxOverBase is generated with
1040 // for (int base = 2; base < 37; ++base) {
1041 // absl::uint128 max = std::numeric_limits<absl::uint128>::max();
1042 // auto result = max / base;
1043 // std::cout << " MakeUint128(" << absl::Uint128High64(result) << "u, "
1044 // << absl::Uint128Low64(result) << "u),\n";
1045 // }
1046 // See https://godbolt.org/z/aneYsb
1047 //
1048 // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
1049 // array to avoid a static initializer.
1050 template <>
1051 ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
1052 0,
1053 0,
1054 MakeUint128(9223372036854775807u, 18446744073709551615u),
1055 MakeUint128(6148914691236517205u, 6148914691236517205u),
1056 MakeUint128(4611686018427387903u, 18446744073709551615u),
1057 MakeUint128(3689348814741910323u, 3689348814741910323u),
1058 MakeUint128(3074457345618258602u, 12297829382473034410u),
1059 MakeUint128(2635249153387078802u, 5270498306774157604u),
1060 MakeUint128(2305843009213693951u, 18446744073709551615u),
1061 MakeUint128(2049638230412172401u, 14347467612885206812u),
1062 MakeUint128(1844674407370955161u, 11068046444225730969u),
1063 MakeUint128(1676976733973595601u, 8384883669867978007u),
1064 MakeUint128(1537228672809129301u, 6148914691236517205u),
1065 MakeUint128(1418980313362273201u, 4256940940086819603u),
1066 MakeUint128(1317624576693539401u, 2635249153387078802u),
1067 MakeUint128(1229782938247303441u, 1229782938247303441u),
1068 MakeUint128(1152921504606846975u, 18446744073709551615u),
1069 MakeUint128(1085102592571150095u, 1085102592571150095u),
1070 MakeUint128(1024819115206086200u, 16397105843297379214u),
1071 MakeUint128(970881267037344821u, 16504981539634861972u),
1072 MakeUint128(922337203685477580u, 14757395258967641292u),
1073 MakeUint128(878416384462359600u, 14054662151397753612u),
1074 MakeUint128(838488366986797800u, 13415813871788764811u),
1075 MakeUint128(802032351030850070u, 4812194106185100421u),
1076 MakeUint128(768614336404564650u, 12297829382473034410u),
1077 MakeUint128(737869762948382064u, 11805916207174113034u),
1078 MakeUint128(709490156681136600u, 11351842506898185609u),
1079 MakeUint128(683212743470724133u, 17080318586768103348u),
1080 MakeUint128(658812288346769700u, 10540996613548315209u),
1081 MakeUint128(636094623231363848u, 15266270957552732371u),
1082 MakeUint128(614891469123651720u, 9838263505978427528u),
1083 MakeUint128(595056260442243600u, 9520900167075897608u),
1084 MakeUint128(576460752303423487u, 18446744073709551615u),
1085 MakeUint128(558992244657865200u, 8943875914525843207u),
1086 MakeUint128(542551296285575047u, 9765923333140350855u),
1087 MakeUint128(527049830677415760u, 8432797290838652167u),
1088 MakeUint128(512409557603043100u, 8198552921648689607u),
1089 };
1090
1091 // This kVmaxOverBase generated with
1092 // for (int base = 2; base < 37; ++base) {
1093 // absl::int128 max = std::numeric_limits<absl::int128>::max();
1094 // auto result = max / base;
1095 // std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
1096 // << absl::Int128Low64(result) << "u),\n";
1097 // }
1098 // See https://godbolt.org/z/7djYWz
1099 //
1100 // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
1101 // to avoid a static initializer.
1102 template <>
1103 ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = {
1104 0,
1105 0,
1106 MakeInt128(4611686018427387903, 18446744073709551615u),
1107 MakeInt128(3074457345618258602, 12297829382473034410u),
1108 MakeInt128(2305843009213693951, 18446744073709551615u),
1109 MakeInt128(1844674407370955161, 11068046444225730969u),
1110 MakeInt128(1537228672809129301, 6148914691236517205u),
1111 MakeInt128(1317624576693539401, 2635249153387078802u),
1112 MakeInt128(1152921504606846975, 18446744073709551615u),
1113 MakeInt128(1024819115206086200, 16397105843297379214u),
1114 MakeInt128(922337203685477580, 14757395258967641292u),
1115 MakeInt128(838488366986797800, 13415813871788764811u),
1116 MakeInt128(768614336404564650, 12297829382473034410u),
1117 MakeInt128(709490156681136600, 11351842506898185609u),
1118 MakeInt128(658812288346769700, 10540996613548315209u),
1119 MakeInt128(614891469123651720, 9838263505978427528u),
1120 MakeInt128(576460752303423487, 18446744073709551615u),
1121 MakeInt128(542551296285575047, 9765923333140350855u),
1122 MakeInt128(512409557603043100, 8198552921648689607u),
1123 MakeInt128(485440633518672410, 17475862806672206794u),
1124 MakeInt128(461168601842738790, 7378697629483820646u),
1125 MakeInt128(439208192231179800, 7027331075698876806u),
1126 MakeInt128(419244183493398900, 6707906935894382405u),
1127 MakeInt128(401016175515425035, 2406097053092550210u),
1128 MakeInt128(384307168202282325, 6148914691236517205u),
1129 MakeInt128(368934881474191032, 5902958103587056517u),
1130 MakeInt128(354745078340568300, 5675921253449092804u),
1131 MakeInt128(341606371735362066, 17763531330238827482u),
1132 MakeInt128(329406144173384850, 5270498306774157604u),
1133 MakeInt128(318047311615681924, 7633135478776366185u),
1134 MakeInt128(307445734561825860, 4919131752989213764u),
1135 MakeInt128(297528130221121800, 4760450083537948804u),
1136 MakeInt128(288230376151711743, 18446744073709551615u),
1137 MakeInt128(279496122328932600, 4471937957262921603u),
1138 MakeInt128(271275648142787523, 14106333703424951235u),
1139 MakeInt128(263524915338707880, 4216398645419326083u),
1140 MakeInt128(256204778801521550, 4099276460824344803u),
1141 };
1142
1143 // This kVminOverBase generated with
1144 // for (int base = 2; base < 37; ++base) {
1145 // absl::int128 min = std::numeric_limits<absl::int128>::min();
1146 // auto result = min / base;
1147 // std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
1148 // << absl::Int128Low64(result) << "u),\n";
1149 // }
1150 //
1151 // See https://godbolt.org/z/7djYWz
1152 //
1153 // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
1154 // to avoid a static initializer.
1155 template <>
1156 ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = {
1157 0,
1158 0,
1159 MakeInt128(-4611686018427387904, 0u),
1160 MakeInt128(-3074457345618258603, 6148914691236517206u),
1161 MakeInt128(-2305843009213693952, 0u),
1162 MakeInt128(-1844674407370955162, 7378697629483820647u),
1163 MakeInt128(-1537228672809129302, 12297829382473034411u),
1164 MakeInt128(-1317624576693539402, 15811494920322472814u),
1165 MakeInt128(-1152921504606846976, 0u),
1166 MakeInt128(-1024819115206086201, 2049638230412172402u),
1167 MakeInt128(-922337203685477581, 3689348814741910324u),
1168 MakeInt128(-838488366986797801, 5030930201920786805u),
1169 MakeInt128(-768614336404564651, 6148914691236517206u),
1170 MakeInt128(-709490156681136601, 7094901566811366007u),
1171 MakeInt128(-658812288346769701, 7905747460161236407u),
1172 MakeInt128(-614891469123651721, 8608480567731124088u),
1173 MakeInt128(-576460752303423488, 0u),
1174 MakeInt128(-542551296285575048, 8680820740569200761u),
1175 MakeInt128(-512409557603043101, 10248191152060862009u),
1176 MakeInt128(-485440633518672411, 970881267037344822u),
1177 MakeInt128(-461168601842738791, 11068046444225730970u),
1178 MakeInt128(-439208192231179801, 11419412998010674810u),
1179 MakeInt128(-419244183493398901, 11738837137815169211u),
1180 MakeInt128(-401016175515425036, 16040647020617001406u),
1181 MakeInt128(-384307168202282326, 12297829382473034411u),
1182 MakeInt128(-368934881474191033, 12543785970122495099u),
1183 MakeInt128(-354745078340568301, 12770822820260458812u),
1184 MakeInt128(-341606371735362067, 683212743470724134u),
1185 MakeInt128(-329406144173384851, 13176245766935394012u),
1186 MakeInt128(-318047311615681925, 10813608594933185431u),
1187 MakeInt128(-307445734561825861, 13527612320720337852u),
1188 MakeInt128(-297528130221121801, 13686293990171602812u),
1189 MakeInt128(-288230376151711744, 0u),
1190 MakeInt128(-279496122328932601, 13974806116446630013u),
1191 MakeInt128(-271275648142787524, 4340410370284600381u),
1192 MakeInt128(-263524915338707881, 14230345428290225533u),
1193 MakeInt128(-256204778801521551, 14347467612885206813u),
1194 };
1195
1196 template <typename IntType>
1197 ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] =
1198 X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
1199
1200 template <typename IntType>
1201 ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] =
1202 X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
1203
1204 #undef X_OVER_BASE_INITIALIZER
1205
1206 template <typename IntType>
safe_parse_positive_int(absl::string_view text,int base,absl::Nonnull<IntType * > value_p)1207 inline bool safe_parse_positive_int(absl::string_view text, int base,
1208 absl::Nonnull<IntType*> value_p) {
1209 IntType value = 0;
1210 const IntType vmax = std::numeric_limits<IntType>::max();
1211 assert(vmax > 0);
1212 assert(base >= 0);
1213 const IntType base_inttype = static_cast<IntType>(base);
1214 assert(vmax >= base_inttype);
1215 const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
1216 assert(base < 2 ||
1217 std::numeric_limits<IntType>::max() / base_inttype == vmax_over_base);
1218 const char* start = text.data();
1219 const char* end = start + text.size();
1220 // loop over digits
1221 for (; start < end; ++start) {
1222 unsigned char c = static_cast<unsigned char>(start[0]);
1223 IntType digit = static_cast<IntType>(kAsciiToInt[c]);
1224 if (digit >= base_inttype) {
1225 *value_p = value;
1226 return false;
1227 }
1228 if (value > vmax_over_base) {
1229 *value_p = vmax;
1230 return false;
1231 }
1232 value *= base_inttype;
1233 if (value > vmax - digit) {
1234 *value_p = vmax;
1235 return false;
1236 }
1237 value += digit;
1238 }
1239 *value_p = value;
1240 return true;
1241 }
1242
1243 template <typename IntType>
safe_parse_negative_int(absl::string_view text,int base,absl::Nonnull<IntType * > value_p)1244 inline bool safe_parse_negative_int(absl::string_view text, int base,
1245 absl::Nonnull<IntType*> value_p) {
1246 IntType value = 0;
1247 const IntType vmin = std::numeric_limits<IntType>::min();
1248 assert(vmin < 0);
1249 assert(vmin <= 0 - base);
1250 IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
1251 assert(base < 2 ||
1252 std::numeric_limits<IntType>::min() / base == vmin_over_base);
1253 // 2003 c++ standard [expr.mul]
1254 // "... the sign of the remainder is implementation-defined."
1255 // Although (vmin/base)*base + vmin%base is always vmin.
1256 // 2011 c++ standard tightens the spec but we cannot rely on it.
1257 // TODO(junyer): Handle this in the lookup table generation.
1258 if (vmin % base > 0) {
1259 vmin_over_base += 1;
1260 }
1261 const char* start = text.data();
1262 const char* end = start + text.size();
1263 // loop over digits
1264 for (; start < end; ++start) {
1265 unsigned char c = static_cast<unsigned char>(start[0]);
1266 int digit = kAsciiToInt[c];
1267 if (digit >= base) {
1268 *value_p = value;
1269 return false;
1270 }
1271 if (value < vmin_over_base) {
1272 *value_p = vmin;
1273 return false;
1274 }
1275 value *= base;
1276 if (value < vmin + digit) {
1277 *value_p = vmin;
1278 return false;
1279 }
1280 value -= digit;
1281 }
1282 *value_p = value;
1283 return true;
1284 }
1285
1286 // Input format based on POSIX.1-2008 strtol
1287 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
1288 template <typename IntType>
safe_int_internal(absl::string_view text,absl::Nonnull<IntType * > value_p,int base)1289 inline bool safe_int_internal(absl::string_view text,
1290 absl::Nonnull<IntType*> value_p, int base) {
1291 *value_p = 0;
1292 bool negative;
1293 if (!safe_parse_sign_and_base(&text, &base, &negative)) {
1294 return false;
1295 }
1296 if (!negative) {
1297 return safe_parse_positive_int(text, base, value_p);
1298 } else {
1299 return safe_parse_negative_int(text, base, value_p);
1300 }
1301 }
1302
1303 template <typename IntType>
safe_uint_internal(absl::string_view text,absl::Nonnull<IntType * > value_p,int base)1304 inline bool safe_uint_internal(absl::string_view text,
1305 absl::Nonnull<IntType*> value_p, int base) {
1306 *value_p = 0;
1307 bool negative;
1308 if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
1309 return false;
1310 }
1311 return safe_parse_positive_int(text, base, value_p);
1312 }
1313 } // anonymous namespace
1314
1315 namespace numbers_internal {
1316
1317 // Digit conversion.
1318 ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
1319 "0123456789abcdef";
1320
1321 ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
1322 "000102030405060708090a0b0c0d0e0f"
1323 "101112131415161718191a1b1c1d1e1f"
1324 "202122232425262728292a2b2c2d2e2f"
1325 "303132333435363738393a3b3c3d3e3f"
1326 "404142434445464748494a4b4c4d4e4f"
1327 "505152535455565758595a5b5c5d5e5f"
1328 "606162636465666768696a6b6c6d6e6f"
1329 "707172737475767778797a7b7c7d7e7f"
1330 "808182838485868788898a8b8c8d8e8f"
1331 "909192939495969798999a9b9c9d9e9f"
1332 "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
1333 "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
1334 "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
1335 "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
1336 "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
1337 "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
1338
safe_strto32_base(absl::string_view text,absl::Nonnull<int32_t * > value,int base)1339 bool safe_strto32_base(absl::string_view text, absl::Nonnull<int32_t*> value,
1340 int base) {
1341 return safe_int_internal<int32_t>(text, value, base);
1342 }
1343
safe_strto64_base(absl::string_view text,absl::Nonnull<int64_t * > value,int base)1344 bool safe_strto64_base(absl::string_view text, absl::Nonnull<int64_t*> value,
1345 int base) {
1346 return safe_int_internal<int64_t>(text, value, base);
1347 }
1348
safe_strto128_base(absl::string_view text,absl::Nonnull<int128 * > value,int base)1349 bool safe_strto128_base(absl::string_view text, absl::Nonnull<int128*> value,
1350 int base) {
1351 return safe_int_internal<absl::int128>(text, value, base);
1352 }
1353
safe_strtou32_base(absl::string_view text,absl::Nonnull<uint32_t * > value,int base)1354 bool safe_strtou32_base(absl::string_view text, absl::Nonnull<uint32_t*> value,
1355 int base) {
1356 return safe_uint_internal<uint32_t>(text, value, base);
1357 }
1358
safe_strtou64_base(absl::string_view text,absl::Nonnull<uint64_t * > value,int base)1359 bool safe_strtou64_base(absl::string_view text, absl::Nonnull<uint64_t*> value,
1360 int base) {
1361 return safe_uint_internal<uint64_t>(text, value, base);
1362 }
1363
safe_strtou128_base(absl::string_view text,absl::Nonnull<uint128 * > value,int base)1364 bool safe_strtou128_base(absl::string_view text, absl::Nonnull<uint128*> value,
1365 int base) {
1366 return safe_uint_internal<absl::uint128>(text, value, base);
1367 }
1368
1369 } // namespace numbers_internal
1370 ABSL_NAMESPACE_END
1371 } // namespace absl
1372