• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This file contains string processing functions related to
16 // numeric values.
17 
18 #include "absl/strings/numbers.h"
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <cfloat>  // for DBL_DIG and FLT_DIG
23 #include <cmath>   // for HUGE_VAL
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstdlib>
27 #include <cstring>
28 #include <iterator>
29 #include <limits>
30 #include <memory>
31 #include <utility>
32 
33 #include "absl/base/attributes.h"
34 #include "absl/base/internal/bits.h"
35 #include "absl/base/internal/raw_logging.h"
36 #include "absl/strings/ascii.h"
37 #include "absl/strings/charconv.h"
38 #include "absl/strings/escaping.h"
39 #include "absl/strings/internal/memutil.h"
40 #include "absl/strings/match.h"
41 #include "absl/strings/str_cat.h"
42 
43 namespace absl {
44 ABSL_NAMESPACE_BEGIN
45 
SimpleAtof(absl::string_view str,float * out)46 bool SimpleAtof(absl::string_view str, float* out) {
47   *out = 0.0;
48   str = StripAsciiWhitespace(str);
49   if (!str.empty() && str[0] == '+') {
50     str.remove_prefix(1);
51   }
52   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
53   if (result.ec == std::errc::invalid_argument) {
54     return false;
55   }
56   if (result.ptr != str.data() + str.size()) {
57     // not all non-whitespace characters consumed
58     return false;
59   }
60   // from_chars() with DR 3081's current wording will return max() on
61   // overflow.  SimpleAtof returns infinity instead.
62   if (result.ec == std::errc::result_out_of_range) {
63     if (*out > 1.0) {
64       *out = std::numeric_limits<float>::infinity();
65     } else if (*out < -1.0) {
66       *out = -std::numeric_limits<float>::infinity();
67     }
68   }
69   return true;
70 }
71 
SimpleAtod(absl::string_view str,double * out)72 bool SimpleAtod(absl::string_view str, double* out) {
73   *out = 0.0;
74   str = StripAsciiWhitespace(str);
75   if (!str.empty() && str[0] == '+') {
76     str.remove_prefix(1);
77   }
78   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
79   if (result.ec == std::errc::invalid_argument) {
80     return false;
81   }
82   if (result.ptr != str.data() + str.size()) {
83     // not all non-whitespace characters consumed
84     return false;
85   }
86   // from_chars() with DR 3081's current wording will return max() on
87   // overflow.  SimpleAtod returns infinity instead.
88   if (result.ec == std::errc::result_out_of_range) {
89     if (*out > 1.0) {
90       *out = std::numeric_limits<double>::infinity();
91     } else if (*out < -1.0) {
92       *out = -std::numeric_limits<double>::infinity();
93     }
94   }
95   return true;
96 }
97 
SimpleAtob(absl::string_view str,bool * out)98 bool SimpleAtob(absl::string_view str, bool* out) {
99   ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
100   if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
101       EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
102       EqualsIgnoreCase(str, "1")) {
103     *out = true;
104     return true;
105   }
106   if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
107       EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
108       EqualsIgnoreCase(str, "0")) {
109     *out = false;
110     return true;
111   }
112   return false;
113 }
114 
115 // ----------------------------------------------------------------------
116 // FastIntToBuffer() overloads
117 //
118 // Like the Fast*ToBuffer() functions above, these are intended for speed.
119 // Unlike the Fast*ToBuffer() functions, however, these functions write
120 // their output to the beginning of the buffer.  The caller is responsible
121 // for ensuring that the buffer has enough space to hold the output.
122 //
123 // Returns a pointer to the end of the string (i.e. the null character
124 // terminating the string).
125 // ----------------------------------------------------------------------
126 
127 namespace {
128 
129 // Used to optimize printing a decimal number's final digit.
130 const char one_ASCII_final_digits[10][2] {
131   {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
132   {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
133 };
134 
135 }  // namespace
136 
FastIntToBuffer(uint32_t i,char * buffer)137 char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
138   uint32_t digits;
139   // The idea of this implementation is to trim the number of divides to as few
140   // as possible, and also reducing memory stores and branches, by going in
141   // steps of two digits at a time rather than one whenever possible.
142   // The huge-number case is first, in the hopes that the compiler will output
143   // that case in one branch-free block of code, and only output conditional
144   // branches into it from below.
145   if (i >= 1000000000) {     // >= 1,000,000,000
146     digits = i / 100000000;  //      100,000,000
147     i -= digits * 100000000;
148     PutTwoDigits(digits, buffer);
149     buffer += 2;
150   lt100_000_000:
151     digits = i / 1000000;  // 1,000,000
152     i -= digits * 1000000;
153     PutTwoDigits(digits, buffer);
154     buffer += 2;
155   lt1_000_000:
156     digits = i / 10000;  // 10,000
157     i -= digits * 10000;
158     PutTwoDigits(digits, buffer);
159     buffer += 2;
160   lt10_000:
161     digits = i / 100;
162     i -= digits * 100;
163     PutTwoDigits(digits, buffer);
164     buffer += 2;
165  lt100:
166     digits = i;
167     PutTwoDigits(digits, buffer);
168     buffer += 2;
169     *buffer = 0;
170     return buffer;
171   }
172 
173   if (i < 100) {
174     digits = i;
175     if (i >= 10) goto lt100;
176     memcpy(buffer, one_ASCII_final_digits[i], 2);
177     return buffer + 1;
178   }
179   if (i < 10000) {  //    10,000
180     if (i >= 1000) goto lt10_000;
181     digits = i / 100;
182     i -= digits * 100;
183     *buffer++ = '0' + digits;
184     goto lt100;
185   }
186   if (i < 1000000) {  //    1,000,000
187     if (i >= 100000) goto lt1_000_000;
188     digits = i / 10000;  //    10,000
189     i -= digits * 10000;
190     *buffer++ = '0' + digits;
191     goto lt10_000;
192   }
193   if (i < 100000000) {  //    100,000,000
194     if (i >= 10000000) goto lt100_000_000;
195     digits = i / 1000000;  //   1,000,000
196     i -= digits * 1000000;
197     *buffer++ = '0' + digits;
198     goto lt1_000_000;
199   }
200   // we already know that i < 1,000,000,000
201   digits = i / 100000000;  //   100,000,000
202   i -= digits * 100000000;
203   *buffer++ = '0' + digits;
204   goto lt100_000_000;
205 }
206 
FastIntToBuffer(int32_t i,char * buffer)207 char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
208   uint32_t u = i;
209   if (i < 0) {
210     *buffer++ = '-';
211     // We need to do the negation in modular (i.e., "unsigned")
212     // arithmetic; MSVC++ apprently warns for plain "-u", so
213     // we write the equivalent expression "0 - u" instead.
214     u = 0 - u;
215   }
216   return numbers_internal::FastIntToBuffer(u, buffer);
217 }
218 
FastIntToBuffer(uint64_t i,char * buffer)219 char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
220   uint32_t u32 = static_cast<uint32_t>(i);
221   if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
222 
223   // Here we know i has at least 10 decimal digits.
224   uint64_t top_1to11 = i / 1000000000;
225   u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
226   uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
227 
228   if (top_1to11_32 == top_1to11) {
229     buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
230   } else {
231     // top_1to11 has more than 32 bits too; print it in two steps.
232     uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
233     uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
234     buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
235     PutTwoDigits(mid_2, buffer);
236     buffer += 2;
237   }
238 
239   // We have only 9 digits now, again the maximum uint32_t can handle fully.
240   uint32_t digits = u32 / 10000000;  // 10,000,000
241   u32 -= digits * 10000000;
242   PutTwoDigits(digits, buffer);
243   buffer += 2;
244   digits = u32 / 100000;  // 100,000
245   u32 -= digits * 100000;
246   PutTwoDigits(digits, buffer);
247   buffer += 2;
248   digits = u32 / 1000;  // 1,000
249   u32 -= digits * 1000;
250   PutTwoDigits(digits, buffer);
251   buffer += 2;
252   digits = u32 / 10;
253   u32 -= digits * 10;
254   PutTwoDigits(digits, buffer);
255   buffer += 2;
256   memcpy(buffer, one_ASCII_final_digits[u32], 2);
257   return buffer + 1;
258 }
259 
FastIntToBuffer(int64_t i,char * buffer)260 char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
261   uint64_t u = i;
262   if (i < 0) {
263     *buffer++ = '-';
264     u = 0 - u;
265   }
266   return numbers_internal::FastIntToBuffer(u, buffer);
267 }
268 
269 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
270 // return that number multiplied by the given 32-bit value.  If the result is
271 // too large to fit in a 128-bit number, divide it by 2 until it fits.
Mul32(std::pair<uint64_t,uint64_t> num,uint32_t mul)272 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
273                                            uint32_t mul) {
274   uint64_t bits0_31 = num.second & 0xFFFFFFFF;
275   uint64_t bits32_63 = num.second >> 32;
276   uint64_t bits64_95 = num.first & 0xFFFFFFFF;
277   uint64_t bits96_127 = num.first >> 32;
278 
279   // The picture so far: each of these 64-bit values has only the lower 32 bits
280   // filled in.
281   // bits96_127:          [ 00000000 xxxxxxxx ]
282   // bits64_95:                    [ 00000000 xxxxxxxx ]
283   // bits32_63:                             [ 00000000 xxxxxxxx ]
284   // bits0_31:                                       [ 00000000 xxxxxxxx ]
285 
286   bits0_31 *= mul;
287   bits32_63 *= mul;
288   bits64_95 *= mul;
289   bits96_127 *= mul;
290 
291   // Now the top halves may also have value, though all 64 of their bits will
292   // never be set at the same time, since they are a result of a 32x32 bit
293   // multiply.  This makes the carry calculation slightly easier.
294   // bits96_127:          [ mmmmmmmm | mmmmmmmm ]
295   // bits64_95:                    [ | mmmmmmmm mmmmmmmm | ]
296   // bits32_63:                      |        [ mmmmmmmm | mmmmmmmm ]
297   // bits0_31:                       |                 [ | mmmmmmmm mmmmmmmm ]
298   // eventually:        [ bits128_up | ...bits64_127.... | ..bits0_63... ]
299 
300   uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
301   uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
302                         (bits0_63 < bits0_31);
303   uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
304   if (bits128_up == 0) return {bits64_127, bits0_63};
305 
306   int shift = 64 - base_internal::CountLeadingZeros64(bits128_up);
307   uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
308   uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
309   return {hi, lo};
310 }
311 
312 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
313 // where the first bit is always a one.  So PowFive(1, 0) starts 0b100000,
314 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
PowFive(uint64_t num,int expfive)315 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
316   std::pair<uint64_t, uint64_t> result = {num, 0};
317   while (expfive >= 13) {
318     // 5^13 is the highest power of five that will fit in a 32-bit integer.
319     result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
320     expfive -= 13;
321   }
322   constexpr int powers_of_five[13] = {
323       1,
324       5,
325       5 * 5,
326       5 * 5 * 5,
327       5 * 5 * 5 * 5,
328       5 * 5 * 5 * 5 * 5,
329       5 * 5 * 5 * 5 * 5 * 5,
330       5 * 5 * 5 * 5 * 5 * 5 * 5,
331       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
332       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
333       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
334       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
335       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
336   result = Mul32(result, powers_of_five[expfive & 15]);
337   int shift = base_internal::CountLeadingZeros64(result.first);
338   if (shift != 0) {
339     result.first = (result.first << shift) + (result.second >> (64 - shift));
340     result.second = (result.second << shift);
341   }
342   return result;
343 }
344 
345 struct ExpDigits {
346   int32_t exponent;
347   char digits[6];
348 };
349 
350 // SplitToSix converts value, a positive double-precision floating-point number,
351 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
352 // zero.  For example, SplitToSix(1) returns an exponent of zero and a digits
353 // array of {'1', '0', '0', '0', '0', '0'}.  If value is exactly halfway between
354 // two possible representations, e.g. value = 100000.5, then "round to even" is
355 // performed.
SplitToSix(const double value)356 static ExpDigits SplitToSix(const double value) {
357   ExpDigits exp_dig;
358   int exp = 5;
359   double d = value;
360   // First step: calculate a close approximation of the output, where the
361   // value d will be between 100,000 and 999,999, representing the digits
362   // in the output ASCII array, and exp is the base-10 exponent.  It would be
363   // faster to use a table here, and to look up the base-2 exponent of value,
364   // however value is an IEEE-754 64-bit number, so the table would have 2,000
365   // entries, which is not cache-friendly.
366   if (d >= 999999.5) {
367     if (d >= 1e+261) exp += 256, d *= 1e-256;
368     if (d >= 1e+133) exp += 128, d *= 1e-128;
369     if (d >= 1e+69) exp += 64, d *= 1e-64;
370     if (d >= 1e+37) exp += 32, d *= 1e-32;
371     if (d >= 1e+21) exp += 16, d *= 1e-16;
372     if (d >= 1e+13) exp += 8, d *= 1e-8;
373     if (d >= 1e+9) exp += 4, d *= 1e-4;
374     if (d >= 1e+7) exp += 2, d *= 1e-2;
375     if (d >= 1e+6) exp += 1, d *= 1e-1;
376   } else {
377     if (d < 1e-250) exp -= 256, d *= 1e256;
378     if (d < 1e-122) exp -= 128, d *= 1e128;
379     if (d < 1e-58) exp -= 64, d *= 1e64;
380     if (d < 1e-26) exp -= 32, d *= 1e32;
381     if (d < 1e-10) exp -= 16, d *= 1e16;
382     if (d < 1e-2) exp -= 8, d *= 1e8;
383     if (d < 1e+2) exp -= 4, d *= 1e4;
384     if (d < 1e+4) exp -= 2, d *= 1e2;
385     if (d < 1e+5) exp -= 1, d *= 1e1;
386   }
387   // At this point, d is in the range [99999.5..999999.5) and exp is in the
388   // range [-324..308]. Since we need to round d up, we want to add a half
389   // and truncate.
390   // However, the technique above may have lost some precision, due to its
391   // repeated multiplication by constants that each may be off by half a bit
392   // of precision.  This only matters if we're close to the edge though.
393   // Since we'd like to know if the fractional part of d is close to a half,
394   // we multiply it by 65536 and see if the fractional part is close to 32768.
395   // (The number doesn't have to be a power of two,but powers of two are faster)
396   uint64_t d64k = d * 65536;
397   int dddddd;  // A 6-digit decimal integer.
398   if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
399     // OK, it's fairly likely that precision was lost above, which is
400     // not a surprise given only 52 mantissa bits are available.  Therefore
401     // redo the calculation using 128-bit numbers.  (64 bits are not enough).
402 
403     // Start out with digits rounded down; maybe add one below.
404     dddddd = static_cast<int>(d64k / 65536);
405 
406     // mantissa is a 64-bit integer representing M.mmm... * 2^63.  The actual
407     // value we're representing, of course, is M.mmm... * 2^exp2.
408     int exp2;
409     double m = std::frexp(value, &exp2);
410     uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
411     // std::frexp returns an m value in the range [0.5, 1.0), however we
412     // can't multiply it by 2^64 and convert to an integer because some FPUs
413     // throw an exception when converting an number higher than 2^63 into an
414     // integer - even an unsigned 64-bit integer!  Fortunately it doesn't matter
415     // since m only has 52 significant bits anyway.
416     mantissa <<= 1;
417     exp2 -= 64;  // not needed, but nice for debugging
418 
419     // OK, we are here to compare:
420     //     (dddddd + 0.5) * 10^(exp-5)  vs.  mantissa * 2^exp2
421     // so we can round up dddddd if appropriate.  Those values span the full
422     // range of 600 orders of magnitude of IEE 64-bit floating-point.
423     // Fortunately, we already know they are very close, so we don't need to
424     // track the base-2 exponent of both sides.  This greatly simplifies the
425     // the math since the 2^exp2 calculation is unnecessary and the power-of-10
426     // calculation can become a power-of-5 instead.
427 
428     std::pair<uint64_t, uint64_t> edge, val;
429     if (exp >= 6) {
430       // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
431       // Since we're tossing powers of two, 2 * dddddd + 1 is the
432       // same as dddddd + 0.5
433       edge = PowFive(2 * dddddd + 1, exp - 5);
434 
435       val.first = mantissa;
436       val.second = 0;
437     } else {
438       // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
439       // above because (exp - 5) is negative.  So we compare (dddddd + 0.5) to
440       // mantissa * 5 ^ (5 - exp)
441       edge = PowFive(2 * dddddd + 1, 0);
442 
443       val = PowFive(mantissa, 5 - exp);
444     }
445     // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
446     //        val.second, edge.first, edge.second);
447     if (val > edge) {
448       dddddd++;
449     } else if (val == edge) {
450       dddddd += (dddddd & 1);
451     }
452   } else {
453     // Here, we are not close to the edge.
454     dddddd = static_cast<int>((d64k + 32768) / 65536);
455   }
456   if (dddddd == 1000000) {
457     dddddd = 100000;
458     exp += 1;
459   }
460   exp_dig.exponent = exp;
461 
462   int two_digits = dddddd / 10000;
463   dddddd -= two_digits * 10000;
464   numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
465 
466   two_digits = dddddd / 100;
467   dddddd -= two_digits * 100;
468   numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
469 
470   numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
471   return exp_dig;
472 }
473 
474 // Helper function for fast formatting of floating-point.
475 // The result is the same as "%g", a.k.a. "%.6g".
SixDigitsToBuffer(double d,char * const buffer)476 size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
477   static_assert(std::numeric_limits<float>::is_iec559,
478                 "IEEE-754/IEC-559 support only");
479 
480   char* out = buffer;  // we write data to out, incrementing as we go, but
481                        // FloatToBuffer always returns the address of the buffer
482                        // passed in.
483 
484   if (std::isnan(d)) {
485     strcpy(out, "nan");  // NOLINT(runtime/printf)
486     return 3;
487   }
488   if (d == 0) {  // +0 and -0 are handled here
489     if (std::signbit(d)) *out++ = '-';
490     *out++ = '0';
491     *out = 0;
492     return out - buffer;
493   }
494   if (d < 0) {
495     *out++ = '-';
496     d = -d;
497   }
498   if (std::isinf(d)) {
499     strcpy(out, "inf");  // NOLINT(runtime/printf)
500     return out + 3 - buffer;
501   }
502 
503   auto exp_dig = SplitToSix(d);
504   int exp = exp_dig.exponent;
505   const char* digits = exp_dig.digits;
506   out[0] = '0';
507   out[1] = '.';
508   switch (exp) {
509     case 5:
510       memcpy(out, &digits[0], 6), out += 6;
511       *out = 0;
512       return out - buffer;
513     case 4:
514       memcpy(out, &digits[0], 5), out += 5;
515       if (digits[5] != '0') {
516         *out++ = '.';
517         *out++ = digits[5];
518       }
519       *out = 0;
520       return out - buffer;
521     case 3:
522       memcpy(out, &digits[0], 4), out += 4;
523       if ((digits[5] | digits[4]) != '0') {
524         *out++ = '.';
525         *out++ = digits[4];
526         if (digits[5] != '0') *out++ = digits[5];
527       }
528       *out = 0;
529       return out - buffer;
530     case 2:
531       memcpy(out, &digits[0], 3), out += 3;
532       *out++ = '.';
533       memcpy(out, &digits[3], 3);
534       out += 3;
535       while (out[-1] == '0') --out;
536       if (out[-1] == '.') --out;
537       *out = 0;
538       return out - buffer;
539     case 1:
540       memcpy(out, &digits[0], 2), out += 2;
541       *out++ = '.';
542       memcpy(out, &digits[2], 4);
543       out += 4;
544       while (out[-1] == '0') --out;
545       if (out[-1] == '.') --out;
546       *out = 0;
547       return out - buffer;
548     case 0:
549       memcpy(out, &digits[0], 1), out += 1;
550       *out++ = '.';
551       memcpy(out, &digits[1], 5);
552       out += 5;
553       while (out[-1] == '0') --out;
554       if (out[-1] == '.') --out;
555       *out = 0;
556       return out - buffer;
557     case -4:
558       out[2] = '0';
559       ++out;
560       ABSL_FALLTHROUGH_INTENDED;
561     case -3:
562       out[2] = '0';
563       ++out;
564       ABSL_FALLTHROUGH_INTENDED;
565     case -2:
566       out[2] = '0';
567       ++out;
568       ABSL_FALLTHROUGH_INTENDED;
569     case -1:
570       out += 2;
571       memcpy(out, &digits[0], 6);
572       out += 6;
573       while (out[-1] == '0') --out;
574       *out = 0;
575       return out - buffer;
576   }
577   assert(exp < -4 || exp >= 6);
578   out[0] = digits[0];
579   assert(out[1] == '.');
580   out += 2;
581   memcpy(out, &digits[1], 5), out += 5;
582   while (out[-1] == '0') --out;
583   if (out[-1] == '.') --out;
584   *out++ = 'e';
585   if (exp > 0) {
586     *out++ = '+';
587   } else {
588     *out++ = '-';
589     exp = -exp;
590   }
591   if (exp > 99) {
592     int dig1 = exp / 100;
593     exp -= dig1 * 100;
594     *out++ = '0' + dig1;
595   }
596   PutTwoDigits(exp, out);
597   out += 2;
598   *out = 0;
599   return out - buffer;
600 }
601 
602 namespace {
603 // Represents integer values of digits.
604 // Uses 36 to indicate an invalid character since we support
605 // bases up to 36.
606 static const int8_t kAsciiToInt[256] = {
607     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,  // 16 36s.
608     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
609     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,
610     6,  7,  8,  9,  36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
611     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
612     36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
613     24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
614     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
615     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
616     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
617     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
618     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
619     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
620     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
621 
622 // Parse the sign and optional hex or oct prefix in text.
safe_parse_sign_and_base(absl::string_view * text,int * base_ptr,bool * negative_ptr)623 inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
624                                      int* base_ptr /*inout*/,
625                                      bool* negative_ptr /*output*/) {
626   if (text->data() == nullptr) {
627     return false;
628   }
629 
630   const char* start = text->data();
631   const char* end = start + text->size();
632   int base = *base_ptr;
633 
634   // Consume whitespace.
635   while (start < end && absl::ascii_isspace(start[0])) {
636     ++start;
637   }
638   while (start < end && absl::ascii_isspace(end[-1])) {
639     --end;
640   }
641   if (start >= end) {
642     return false;
643   }
644 
645   // Consume sign.
646   *negative_ptr = (start[0] == '-');
647   if (*negative_ptr || start[0] == '+') {
648     ++start;
649     if (start >= end) {
650       return false;
651     }
652   }
653 
654   // Consume base-dependent prefix.
655   //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
656   //  base 16: "0x" -> base 16
657   // Also validate the base.
658   if (base == 0) {
659     if (end - start >= 2 && start[0] == '0' &&
660         (start[1] == 'x' || start[1] == 'X')) {
661       base = 16;
662       start += 2;
663       if (start >= end) {
664         // "0x" with no digits after is invalid.
665         return false;
666       }
667     } else if (end - start >= 1 && start[0] == '0') {
668       base = 8;
669       start += 1;
670     } else {
671       base = 10;
672     }
673   } else if (base == 16) {
674     if (end - start >= 2 && start[0] == '0' &&
675         (start[1] == 'x' || start[1] == 'X')) {
676       start += 2;
677       if (start >= end) {
678         // "0x" with no digits after is invalid.
679         return false;
680       }
681     }
682   } else if (base >= 2 && base <= 36) {
683     // okay
684   } else {
685     return false;
686   }
687   *text = absl::string_view(start, end - start);
688   *base_ptr = base;
689   return true;
690 }
691 
692 // Consume digits.
693 //
694 // The classic loop:
695 //
696 //   for each digit
697 //     value = value * base + digit
698 //   value *= sign
699 //
700 // The classic loop needs overflow checking.  It also fails on the most
701 // negative integer, -2147483648 in 32-bit two's complement representation.
702 //
703 // My improved loop:
704 //
705 //  if (!negative)
706 //    for each digit
707 //      value = value * base
708 //      value = value + digit
709 //  else
710 //    for each digit
711 //      value = value * base
712 //      value = value - digit
713 //
714 // Overflow checking becomes simple.
715 
716 // Lookup tables per IntType:
717 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
718 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
719 // struct of arrays) would probably be better in terms of d-cache for the most
720 // commonly used bases.
721 template <typename IntType>
722 struct LookupTables {
723   ABSL_CONST_INIT static const IntType kVmaxOverBase[];
724   ABSL_CONST_INIT static const IntType kVminOverBase[];
725 };
726 
727 // An array initializer macro for X/base where base in [0, 36].
728 // However, note that lookups for base in [0, 1] should never happen because
729 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
730 #define X_OVER_BASE_INITIALIZER(X)                                        \
731   {                                                                       \
732     0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
733         X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18,   \
734         X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26,   \
735         X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34,   \
736         X / 35, X / 36,                                                   \
737   }
738 
739 // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
740 // array to avoid a static initializer.
741 template <>
742 const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
743     0,
744     0,
745     MakeUint128(9223372036854775807u, 18446744073709551615u),
746     MakeUint128(6148914691236517205u, 6148914691236517205u),
747     MakeUint128(4611686018427387903u, 18446744073709551615u),
748     MakeUint128(3689348814741910323u, 3689348814741910323u),
749     MakeUint128(3074457345618258602u, 12297829382473034410u),
750     MakeUint128(2635249153387078802u, 5270498306774157604u),
751     MakeUint128(2305843009213693951u, 18446744073709551615u),
752     MakeUint128(2049638230412172401u, 14347467612885206812u),
753     MakeUint128(1844674407370955161u, 11068046444225730969u),
754     MakeUint128(1676976733973595601u, 8384883669867978007u),
755     MakeUint128(1537228672809129301u, 6148914691236517205u),
756     MakeUint128(1418980313362273201u, 4256940940086819603u),
757     MakeUint128(1317624576693539401u, 2635249153387078802u),
758     MakeUint128(1229782938247303441u, 1229782938247303441u),
759     MakeUint128(1152921504606846975u, 18446744073709551615u),
760     MakeUint128(1085102592571150095u, 1085102592571150095u),
761     MakeUint128(1024819115206086200u, 16397105843297379214u),
762     MakeUint128(970881267037344821u, 16504981539634861972u),
763     MakeUint128(922337203685477580u, 14757395258967641292u),
764     MakeUint128(878416384462359600u, 14054662151397753612u),
765     MakeUint128(838488366986797800u, 13415813871788764811u),
766     MakeUint128(802032351030850070u, 4812194106185100421u),
767     MakeUint128(768614336404564650u, 12297829382473034410u),
768     MakeUint128(737869762948382064u, 11805916207174113034u),
769     MakeUint128(709490156681136600u, 11351842506898185609u),
770     MakeUint128(683212743470724133u, 17080318586768103348u),
771     MakeUint128(658812288346769700u, 10540996613548315209u),
772     MakeUint128(636094623231363848u, 15266270957552732371u),
773     MakeUint128(614891469123651720u, 9838263505978427528u),
774     MakeUint128(595056260442243600u, 9520900167075897608u),
775     MakeUint128(576460752303423487u, 18446744073709551615u),
776     MakeUint128(558992244657865200u, 8943875914525843207u),
777     MakeUint128(542551296285575047u, 9765923333140350855u),
778     MakeUint128(527049830677415760u, 8432797290838652167u),
779     MakeUint128(512409557603043100u, 8198552921648689607u),
780 };
781 
782 template <typename IntType>
783 const IntType LookupTables<IntType>::kVmaxOverBase[] =
784     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
785 
786 template <typename IntType>
787 const IntType LookupTables<IntType>::kVminOverBase[] =
788     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
789 
790 #undef X_OVER_BASE_INITIALIZER
791 
792 template <typename IntType>
safe_parse_positive_int(absl::string_view text,int base,IntType * value_p)793 inline bool safe_parse_positive_int(absl::string_view text, int base,
794                                     IntType* value_p) {
795   IntType value = 0;
796   const IntType vmax = std::numeric_limits<IntType>::max();
797   assert(vmax > 0);
798   assert(base >= 0);
799   assert(vmax >= static_cast<IntType>(base));
800   const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
801   assert(base < 2 ||
802          std::numeric_limits<IntType>::max() / base == vmax_over_base);
803   const char* start = text.data();
804   const char* end = start + text.size();
805   // loop over digits
806   for (; start < end; ++start) {
807     unsigned char c = static_cast<unsigned char>(start[0]);
808     int digit = kAsciiToInt[c];
809     if (digit >= base) {
810       *value_p = value;
811       return false;
812     }
813     if (value > vmax_over_base) {
814       *value_p = vmax;
815       return false;
816     }
817     value *= base;
818     if (value > vmax - digit) {
819       *value_p = vmax;
820       return false;
821     }
822     value += digit;
823   }
824   *value_p = value;
825   return true;
826 }
827 
828 template <typename IntType>
safe_parse_negative_int(absl::string_view text,int base,IntType * value_p)829 inline bool safe_parse_negative_int(absl::string_view text, int base,
830                                     IntType* value_p) {
831   IntType value = 0;
832   const IntType vmin = std::numeric_limits<IntType>::min();
833   assert(vmin < 0);
834   assert(vmin <= 0 - base);
835   IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
836   assert(base < 2 ||
837          std::numeric_limits<IntType>::min() / base == vmin_over_base);
838   // 2003 c++ standard [expr.mul]
839   // "... the sign of the remainder is implementation-defined."
840   // Although (vmin/base)*base + vmin%base is always vmin.
841   // 2011 c++ standard tightens the spec but we cannot rely on it.
842   // TODO(junyer): Handle this in the lookup table generation.
843   if (vmin % base > 0) {
844     vmin_over_base += 1;
845   }
846   const char* start = text.data();
847   const char* end = start + text.size();
848   // loop over digits
849   for (; start < end; ++start) {
850     unsigned char c = static_cast<unsigned char>(start[0]);
851     int digit = kAsciiToInt[c];
852     if (digit >= base) {
853       *value_p = value;
854       return false;
855     }
856     if (value < vmin_over_base) {
857       *value_p = vmin;
858       return false;
859     }
860     value *= base;
861     if (value < vmin + digit) {
862       *value_p = vmin;
863       return false;
864     }
865     value -= digit;
866   }
867   *value_p = value;
868   return true;
869 }
870 
871 // Input format based on POSIX.1-2008 strtol
872 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
873 template <typename IntType>
safe_int_internal(absl::string_view text,IntType * value_p,int base)874 inline bool safe_int_internal(absl::string_view text, IntType* value_p,
875                               int base) {
876   *value_p = 0;
877   bool negative;
878   if (!safe_parse_sign_and_base(&text, &base, &negative)) {
879     return false;
880   }
881   if (!negative) {
882     return safe_parse_positive_int(text, base, value_p);
883   } else {
884     return safe_parse_negative_int(text, base, value_p);
885   }
886 }
887 
888 template <typename IntType>
safe_uint_internal(absl::string_view text,IntType * value_p,int base)889 inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
890                                int base) {
891   *value_p = 0;
892   bool negative;
893   if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
894     return false;
895   }
896   return safe_parse_positive_int(text, base, value_p);
897 }
898 }  // anonymous namespace
899 
900 namespace numbers_internal {
901 
902 // Digit conversion.
903 ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
904     "0123456789abcdef";
905 
906 ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
907     "000102030405060708090a0b0c0d0e0f"
908     "101112131415161718191a1b1c1d1e1f"
909     "202122232425262728292a2b2c2d2e2f"
910     "303132333435363738393a3b3c3d3e3f"
911     "404142434445464748494a4b4c4d4e4f"
912     "505152535455565758595a5b5c5d5e5f"
913     "606162636465666768696a6b6c6d6e6f"
914     "707172737475767778797a7b7c7d7e7f"
915     "808182838485868788898a8b8c8d8e8f"
916     "909192939495969798999a9b9c9d9e9f"
917     "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
918     "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
919     "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
920     "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
921     "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
922     "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
923 
924 ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = {
925     {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
926     {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
927     {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
928     {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
929     {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
930     {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
931     {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
932     {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
933     {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
934     {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
935     {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
936     {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
937     {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
938     {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
939     {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
940     {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
941     {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
942 
safe_strto32_base(absl::string_view text,int32_t * value,int base)943 bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
944   return safe_int_internal<int32_t>(text, value, base);
945 }
946 
safe_strto64_base(absl::string_view text,int64_t * value,int base)947 bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
948   return safe_int_internal<int64_t>(text, value, base);
949 }
950 
safe_strtou32_base(absl::string_view text,uint32_t * value,int base)951 bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
952   return safe_uint_internal<uint32_t>(text, value, base);
953 }
954 
safe_strtou64_base(absl::string_view text,uint64_t * value,int base)955 bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
956   return safe_uint_internal<uint64_t>(text, value, base);
957 }
958 
safe_strtou128_base(absl::string_view text,uint128 * value,int base)959 bool safe_strtou128_base(absl::string_view text, uint128* value, int base) {
960   return safe_uint_internal<absl::uint128>(text, value, base);
961 }
962 
963 }  // namespace numbers_internal
964 ABSL_NAMESPACE_END
965 }  // namespace absl
966