1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // This file contains string processing functions related to
16 // numeric values.
17
18 #include "absl/strings/numbers.h"
19
20 #include <algorithm>
21 #include <cassert>
22 #include <cfloat> // for DBL_DIG and FLT_DIG
23 #include <cmath> // for HUGE_VAL
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstdlib>
27 #include <cstring>
28 #include <iterator>
29 #include <limits>
30 #include <memory>
31 #include <utility>
32
33 #include "absl/base/attributes.h"
34 #include "absl/base/internal/bits.h"
35 #include "absl/base/internal/raw_logging.h"
36 #include "absl/strings/ascii.h"
37 #include "absl/strings/charconv.h"
38 #include "absl/strings/escaping.h"
39 #include "absl/strings/internal/memutil.h"
40 #include "absl/strings/match.h"
41 #include "absl/strings/str_cat.h"
42
43 namespace absl {
44 ABSL_NAMESPACE_BEGIN
45
SimpleAtof(absl::string_view str,float * out)46 bool SimpleAtof(absl::string_view str, float* out) {
47 *out = 0.0;
48 str = StripAsciiWhitespace(str);
49 if (!str.empty() && str[0] == '+') {
50 str.remove_prefix(1);
51 }
52 auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
53 if (result.ec == std::errc::invalid_argument) {
54 return false;
55 }
56 if (result.ptr != str.data() + str.size()) {
57 // not all non-whitespace characters consumed
58 return false;
59 }
60 // from_chars() with DR 3081's current wording will return max() on
61 // overflow. SimpleAtof returns infinity instead.
62 if (result.ec == std::errc::result_out_of_range) {
63 if (*out > 1.0) {
64 *out = std::numeric_limits<float>::infinity();
65 } else if (*out < -1.0) {
66 *out = -std::numeric_limits<float>::infinity();
67 }
68 }
69 return true;
70 }
71
SimpleAtod(absl::string_view str,double * out)72 bool SimpleAtod(absl::string_view str, double* out) {
73 *out = 0.0;
74 str = StripAsciiWhitespace(str);
75 if (!str.empty() && str[0] == '+') {
76 str.remove_prefix(1);
77 }
78 auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
79 if (result.ec == std::errc::invalid_argument) {
80 return false;
81 }
82 if (result.ptr != str.data() + str.size()) {
83 // not all non-whitespace characters consumed
84 return false;
85 }
86 // from_chars() with DR 3081's current wording will return max() on
87 // overflow. SimpleAtod returns infinity instead.
88 if (result.ec == std::errc::result_out_of_range) {
89 if (*out > 1.0) {
90 *out = std::numeric_limits<double>::infinity();
91 } else if (*out < -1.0) {
92 *out = -std::numeric_limits<double>::infinity();
93 }
94 }
95 return true;
96 }
97
SimpleAtob(absl::string_view str,bool * out)98 bool SimpleAtob(absl::string_view str, bool* out) {
99 ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
100 if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
101 EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
102 EqualsIgnoreCase(str, "1")) {
103 *out = true;
104 return true;
105 }
106 if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
107 EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
108 EqualsIgnoreCase(str, "0")) {
109 *out = false;
110 return true;
111 }
112 return false;
113 }
114
115 // ----------------------------------------------------------------------
116 // FastIntToBuffer() overloads
117 //
118 // Like the Fast*ToBuffer() functions above, these are intended for speed.
119 // Unlike the Fast*ToBuffer() functions, however, these functions write
120 // their output to the beginning of the buffer. The caller is responsible
121 // for ensuring that the buffer has enough space to hold the output.
122 //
123 // Returns a pointer to the end of the string (i.e. the null character
124 // terminating the string).
125 // ----------------------------------------------------------------------
126
127 namespace {
128
129 // Used to optimize printing a decimal number's final digit.
130 const char one_ASCII_final_digits[10][2] {
131 {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
132 {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
133 };
134
135 } // namespace
136
FastIntToBuffer(uint32_t i,char * buffer)137 char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
138 uint32_t digits;
139 // The idea of this implementation is to trim the number of divides to as few
140 // as possible, and also reducing memory stores and branches, by going in
141 // steps of two digits at a time rather than one whenever possible.
142 // The huge-number case is first, in the hopes that the compiler will output
143 // that case in one branch-free block of code, and only output conditional
144 // branches into it from below.
145 if (i >= 1000000000) { // >= 1,000,000,000
146 digits = i / 100000000; // 100,000,000
147 i -= digits * 100000000;
148 PutTwoDigits(digits, buffer);
149 buffer += 2;
150 lt100_000_000:
151 digits = i / 1000000; // 1,000,000
152 i -= digits * 1000000;
153 PutTwoDigits(digits, buffer);
154 buffer += 2;
155 lt1_000_000:
156 digits = i / 10000; // 10,000
157 i -= digits * 10000;
158 PutTwoDigits(digits, buffer);
159 buffer += 2;
160 lt10_000:
161 digits = i / 100;
162 i -= digits * 100;
163 PutTwoDigits(digits, buffer);
164 buffer += 2;
165 lt100:
166 digits = i;
167 PutTwoDigits(digits, buffer);
168 buffer += 2;
169 *buffer = 0;
170 return buffer;
171 }
172
173 if (i < 100) {
174 digits = i;
175 if (i >= 10) goto lt100;
176 memcpy(buffer, one_ASCII_final_digits[i], 2);
177 return buffer + 1;
178 }
179 if (i < 10000) { // 10,000
180 if (i >= 1000) goto lt10_000;
181 digits = i / 100;
182 i -= digits * 100;
183 *buffer++ = '0' + digits;
184 goto lt100;
185 }
186 if (i < 1000000) { // 1,000,000
187 if (i >= 100000) goto lt1_000_000;
188 digits = i / 10000; // 10,000
189 i -= digits * 10000;
190 *buffer++ = '0' + digits;
191 goto lt10_000;
192 }
193 if (i < 100000000) { // 100,000,000
194 if (i >= 10000000) goto lt100_000_000;
195 digits = i / 1000000; // 1,000,000
196 i -= digits * 1000000;
197 *buffer++ = '0' + digits;
198 goto lt1_000_000;
199 }
200 // we already know that i < 1,000,000,000
201 digits = i / 100000000; // 100,000,000
202 i -= digits * 100000000;
203 *buffer++ = '0' + digits;
204 goto lt100_000_000;
205 }
206
FastIntToBuffer(int32_t i,char * buffer)207 char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
208 uint32_t u = i;
209 if (i < 0) {
210 *buffer++ = '-';
211 // We need to do the negation in modular (i.e., "unsigned")
212 // arithmetic; MSVC++ apprently warns for plain "-u", so
213 // we write the equivalent expression "0 - u" instead.
214 u = 0 - u;
215 }
216 return numbers_internal::FastIntToBuffer(u, buffer);
217 }
218
FastIntToBuffer(uint64_t i,char * buffer)219 char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
220 uint32_t u32 = static_cast<uint32_t>(i);
221 if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
222
223 // Here we know i has at least 10 decimal digits.
224 uint64_t top_1to11 = i / 1000000000;
225 u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
226 uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
227
228 if (top_1to11_32 == top_1to11) {
229 buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
230 } else {
231 // top_1to11 has more than 32 bits too; print it in two steps.
232 uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
233 uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
234 buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
235 PutTwoDigits(mid_2, buffer);
236 buffer += 2;
237 }
238
239 // We have only 9 digits now, again the maximum uint32_t can handle fully.
240 uint32_t digits = u32 / 10000000; // 10,000,000
241 u32 -= digits * 10000000;
242 PutTwoDigits(digits, buffer);
243 buffer += 2;
244 digits = u32 / 100000; // 100,000
245 u32 -= digits * 100000;
246 PutTwoDigits(digits, buffer);
247 buffer += 2;
248 digits = u32 / 1000; // 1,000
249 u32 -= digits * 1000;
250 PutTwoDigits(digits, buffer);
251 buffer += 2;
252 digits = u32 / 10;
253 u32 -= digits * 10;
254 PutTwoDigits(digits, buffer);
255 buffer += 2;
256 memcpy(buffer, one_ASCII_final_digits[u32], 2);
257 return buffer + 1;
258 }
259
FastIntToBuffer(int64_t i,char * buffer)260 char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
261 uint64_t u = i;
262 if (i < 0) {
263 *buffer++ = '-';
264 u = 0 - u;
265 }
266 return numbers_internal::FastIntToBuffer(u, buffer);
267 }
268
269 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
270 // return that number multiplied by the given 32-bit value. If the result is
271 // too large to fit in a 128-bit number, divide it by 2 until it fits.
Mul32(std::pair<uint64_t,uint64_t> num,uint32_t mul)272 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
273 uint32_t mul) {
274 uint64_t bits0_31 = num.second & 0xFFFFFFFF;
275 uint64_t bits32_63 = num.second >> 32;
276 uint64_t bits64_95 = num.first & 0xFFFFFFFF;
277 uint64_t bits96_127 = num.first >> 32;
278
279 // The picture so far: each of these 64-bit values has only the lower 32 bits
280 // filled in.
281 // bits96_127: [ 00000000 xxxxxxxx ]
282 // bits64_95: [ 00000000 xxxxxxxx ]
283 // bits32_63: [ 00000000 xxxxxxxx ]
284 // bits0_31: [ 00000000 xxxxxxxx ]
285
286 bits0_31 *= mul;
287 bits32_63 *= mul;
288 bits64_95 *= mul;
289 bits96_127 *= mul;
290
291 // Now the top halves may also have value, though all 64 of their bits will
292 // never be set at the same time, since they are a result of a 32x32 bit
293 // multiply. This makes the carry calculation slightly easier.
294 // bits96_127: [ mmmmmmmm | mmmmmmmm ]
295 // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
296 // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
297 // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
298 // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
299
300 uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
301 uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
302 (bits0_63 < bits0_31);
303 uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
304 if (bits128_up == 0) return {bits64_127, bits0_63};
305
306 int shift = 64 - base_internal::CountLeadingZeros64(bits128_up);
307 uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
308 uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
309 return {hi, lo};
310 }
311
312 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
313 // where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
314 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
PowFive(uint64_t num,int expfive)315 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
316 std::pair<uint64_t, uint64_t> result = {num, 0};
317 while (expfive >= 13) {
318 // 5^13 is the highest power of five that will fit in a 32-bit integer.
319 result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
320 expfive -= 13;
321 }
322 constexpr int powers_of_five[13] = {
323 1,
324 5,
325 5 * 5,
326 5 * 5 * 5,
327 5 * 5 * 5 * 5,
328 5 * 5 * 5 * 5 * 5,
329 5 * 5 * 5 * 5 * 5 * 5,
330 5 * 5 * 5 * 5 * 5 * 5 * 5,
331 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
332 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
333 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
334 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
335 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
336 result = Mul32(result, powers_of_five[expfive & 15]);
337 int shift = base_internal::CountLeadingZeros64(result.first);
338 if (shift != 0) {
339 result.first = (result.first << shift) + (result.second >> (64 - shift));
340 result.second = (result.second << shift);
341 }
342 return result;
343 }
344
345 struct ExpDigits {
346 int32_t exponent;
347 char digits[6];
348 };
349
350 // SplitToSix converts value, a positive double-precision floating-point number,
351 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
352 // zero. For example, SplitToSix(1) returns an exponent of zero and a digits
353 // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
354 // two possible representations, e.g. value = 100000.5, then "round to even" is
355 // performed.
SplitToSix(const double value)356 static ExpDigits SplitToSix(const double value) {
357 ExpDigits exp_dig;
358 int exp = 5;
359 double d = value;
360 // First step: calculate a close approximation of the output, where the
361 // value d will be between 100,000 and 999,999, representing the digits
362 // in the output ASCII array, and exp is the base-10 exponent. It would be
363 // faster to use a table here, and to look up the base-2 exponent of value,
364 // however value is an IEEE-754 64-bit number, so the table would have 2,000
365 // entries, which is not cache-friendly.
366 if (d >= 999999.5) {
367 if (d >= 1e+261) exp += 256, d *= 1e-256;
368 if (d >= 1e+133) exp += 128, d *= 1e-128;
369 if (d >= 1e+69) exp += 64, d *= 1e-64;
370 if (d >= 1e+37) exp += 32, d *= 1e-32;
371 if (d >= 1e+21) exp += 16, d *= 1e-16;
372 if (d >= 1e+13) exp += 8, d *= 1e-8;
373 if (d >= 1e+9) exp += 4, d *= 1e-4;
374 if (d >= 1e+7) exp += 2, d *= 1e-2;
375 if (d >= 1e+6) exp += 1, d *= 1e-1;
376 } else {
377 if (d < 1e-250) exp -= 256, d *= 1e256;
378 if (d < 1e-122) exp -= 128, d *= 1e128;
379 if (d < 1e-58) exp -= 64, d *= 1e64;
380 if (d < 1e-26) exp -= 32, d *= 1e32;
381 if (d < 1e-10) exp -= 16, d *= 1e16;
382 if (d < 1e-2) exp -= 8, d *= 1e8;
383 if (d < 1e+2) exp -= 4, d *= 1e4;
384 if (d < 1e+4) exp -= 2, d *= 1e2;
385 if (d < 1e+5) exp -= 1, d *= 1e1;
386 }
387 // At this point, d is in the range [99999.5..999999.5) and exp is in the
388 // range [-324..308]. Since we need to round d up, we want to add a half
389 // and truncate.
390 // However, the technique above may have lost some precision, due to its
391 // repeated multiplication by constants that each may be off by half a bit
392 // of precision. This only matters if we're close to the edge though.
393 // Since we'd like to know if the fractional part of d is close to a half,
394 // we multiply it by 65536 and see if the fractional part is close to 32768.
395 // (The number doesn't have to be a power of two,but powers of two are faster)
396 uint64_t d64k = d * 65536;
397 int dddddd; // A 6-digit decimal integer.
398 if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
399 // OK, it's fairly likely that precision was lost above, which is
400 // not a surprise given only 52 mantissa bits are available. Therefore
401 // redo the calculation using 128-bit numbers. (64 bits are not enough).
402
403 // Start out with digits rounded down; maybe add one below.
404 dddddd = static_cast<int>(d64k / 65536);
405
406 // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
407 // value we're representing, of course, is M.mmm... * 2^exp2.
408 int exp2;
409 double m = std::frexp(value, &exp2);
410 uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
411 // std::frexp returns an m value in the range [0.5, 1.0), however we
412 // can't multiply it by 2^64 and convert to an integer because some FPUs
413 // throw an exception when converting an number higher than 2^63 into an
414 // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
415 // since m only has 52 significant bits anyway.
416 mantissa <<= 1;
417 exp2 -= 64; // not needed, but nice for debugging
418
419 // OK, we are here to compare:
420 // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
421 // so we can round up dddddd if appropriate. Those values span the full
422 // range of 600 orders of magnitude of IEE 64-bit floating-point.
423 // Fortunately, we already know they are very close, so we don't need to
424 // track the base-2 exponent of both sides. This greatly simplifies the
425 // the math since the 2^exp2 calculation is unnecessary and the power-of-10
426 // calculation can become a power-of-5 instead.
427
428 std::pair<uint64_t, uint64_t> edge, val;
429 if (exp >= 6) {
430 // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
431 // Since we're tossing powers of two, 2 * dddddd + 1 is the
432 // same as dddddd + 0.5
433 edge = PowFive(2 * dddddd + 1, exp - 5);
434
435 val.first = mantissa;
436 val.second = 0;
437 } else {
438 // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
439 // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
440 // mantissa * 5 ^ (5 - exp)
441 edge = PowFive(2 * dddddd + 1, 0);
442
443 val = PowFive(mantissa, 5 - exp);
444 }
445 // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
446 // val.second, edge.first, edge.second);
447 if (val > edge) {
448 dddddd++;
449 } else if (val == edge) {
450 dddddd += (dddddd & 1);
451 }
452 } else {
453 // Here, we are not close to the edge.
454 dddddd = static_cast<int>((d64k + 32768) / 65536);
455 }
456 if (dddddd == 1000000) {
457 dddddd = 100000;
458 exp += 1;
459 }
460 exp_dig.exponent = exp;
461
462 int two_digits = dddddd / 10000;
463 dddddd -= two_digits * 10000;
464 numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
465
466 two_digits = dddddd / 100;
467 dddddd -= two_digits * 100;
468 numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
469
470 numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
471 return exp_dig;
472 }
473
474 // Helper function for fast formatting of floating-point.
475 // The result is the same as "%g", a.k.a. "%.6g".
SixDigitsToBuffer(double d,char * const buffer)476 size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
477 static_assert(std::numeric_limits<float>::is_iec559,
478 "IEEE-754/IEC-559 support only");
479
480 char* out = buffer; // we write data to out, incrementing as we go, but
481 // FloatToBuffer always returns the address of the buffer
482 // passed in.
483
484 if (std::isnan(d)) {
485 strcpy(out, "nan"); // NOLINT(runtime/printf)
486 return 3;
487 }
488 if (d == 0) { // +0 and -0 are handled here
489 if (std::signbit(d)) *out++ = '-';
490 *out++ = '0';
491 *out = 0;
492 return out - buffer;
493 }
494 if (d < 0) {
495 *out++ = '-';
496 d = -d;
497 }
498 if (std::isinf(d)) {
499 strcpy(out, "inf"); // NOLINT(runtime/printf)
500 return out + 3 - buffer;
501 }
502
503 auto exp_dig = SplitToSix(d);
504 int exp = exp_dig.exponent;
505 const char* digits = exp_dig.digits;
506 out[0] = '0';
507 out[1] = '.';
508 switch (exp) {
509 case 5:
510 memcpy(out, &digits[0], 6), out += 6;
511 *out = 0;
512 return out - buffer;
513 case 4:
514 memcpy(out, &digits[0], 5), out += 5;
515 if (digits[5] != '0') {
516 *out++ = '.';
517 *out++ = digits[5];
518 }
519 *out = 0;
520 return out - buffer;
521 case 3:
522 memcpy(out, &digits[0], 4), out += 4;
523 if ((digits[5] | digits[4]) != '0') {
524 *out++ = '.';
525 *out++ = digits[4];
526 if (digits[5] != '0') *out++ = digits[5];
527 }
528 *out = 0;
529 return out - buffer;
530 case 2:
531 memcpy(out, &digits[0], 3), out += 3;
532 *out++ = '.';
533 memcpy(out, &digits[3], 3);
534 out += 3;
535 while (out[-1] == '0') --out;
536 if (out[-1] == '.') --out;
537 *out = 0;
538 return out - buffer;
539 case 1:
540 memcpy(out, &digits[0], 2), out += 2;
541 *out++ = '.';
542 memcpy(out, &digits[2], 4);
543 out += 4;
544 while (out[-1] == '0') --out;
545 if (out[-1] == '.') --out;
546 *out = 0;
547 return out - buffer;
548 case 0:
549 memcpy(out, &digits[0], 1), out += 1;
550 *out++ = '.';
551 memcpy(out, &digits[1], 5);
552 out += 5;
553 while (out[-1] == '0') --out;
554 if (out[-1] == '.') --out;
555 *out = 0;
556 return out - buffer;
557 case -4:
558 out[2] = '0';
559 ++out;
560 ABSL_FALLTHROUGH_INTENDED;
561 case -3:
562 out[2] = '0';
563 ++out;
564 ABSL_FALLTHROUGH_INTENDED;
565 case -2:
566 out[2] = '0';
567 ++out;
568 ABSL_FALLTHROUGH_INTENDED;
569 case -1:
570 out += 2;
571 memcpy(out, &digits[0], 6);
572 out += 6;
573 while (out[-1] == '0') --out;
574 *out = 0;
575 return out - buffer;
576 }
577 assert(exp < -4 || exp >= 6);
578 out[0] = digits[0];
579 assert(out[1] == '.');
580 out += 2;
581 memcpy(out, &digits[1], 5), out += 5;
582 while (out[-1] == '0') --out;
583 if (out[-1] == '.') --out;
584 *out++ = 'e';
585 if (exp > 0) {
586 *out++ = '+';
587 } else {
588 *out++ = '-';
589 exp = -exp;
590 }
591 if (exp > 99) {
592 int dig1 = exp / 100;
593 exp -= dig1 * 100;
594 *out++ = '0' + dig1;
595 }
596 PutTwoDigits(exp, out);
597 out += 2;
598 *out = 0;
599 return out - buffer;
600 }
601
602 namespace {
603 // Represents integer values of digits.
604 // Uses 36 to indicate an invalid character since we support
605 // bases up to 36.
606 static const int8_t kAsciiToInt[256] = {
607 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
608 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
609 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
610 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
611 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
612 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
613 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
614 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
615 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
616 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
617 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
618 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
619 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
620 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
621
622 // Parse the sign and optional hex or oct prefix in text.
safe_parse_sign_and_base(absl::string_view * text,int * base_ptr,bool * negative_ptr)623 inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
624 int* base_ptr /*inout*/,
625 bool* negative_ptr /*output*/) {
626 if (text->data() == nullptr) {
627 return false;
628 }
629
630 const char* start = text->data();
631 const char* end = start + text->size();
632 int base = *base_ptr;
633
634 // Consume whitespace.
635 while (start < end && absl::ascii_isspace(start[0])) {
636 ++start;
637 }
638 while (start < end && absl::ascii_isspace(end[-1])) {
639 --end;
640 }
641 if (start >= end) {
642 return false;
643 }
644
645 // Consume sign.
646 *negative_ptr = (start[0] == '-');
647 if (*negative_ptr || start[0] == '+') {
648 ++start;
649 if (start >= end) {
650 return false;
651 }
652 }
653
654 // Consume base-dependent prefix.
655 // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
656 // base 16: "0x" -> base 16
657 // Also validate the base.
658 if (base == 0) {
659 if (end - start >= 2 && start[0] == '0' &&
660 (start[1] == 'x' || start[1] == 'X')) {
661 base = 16;
662 start += 2;
663 if (start >= end) {
664 // "0x" with no digits after is invalid.
665 return false;
666 }
667 } else if (end - start >= 1 && start[0] == '0') {
668 base = 8;
669 start += 1;
670 } else {
671 base = 10;
672 }
673 } else if (base == 16) {
674 if (end - start >= 2 && start[0] == '0' &&
675 (start[1] == 'x' || start[1] == 'X')) {
676 start += 2;
677 if (start >= end) {
678 // "0x" with no digits after is invalid.
679 return false;
680 }
681 }
682 } else if (base >= 2 && base <= 36) {
683 // okay
684 } else {
685 return false;
686 }
687 *text = absl::string_view(start, end - start);
688 *base_ptr = base;
689 return true;
690 }
691
692 // Consume digits.
693 //
694 // The classic loop:
695 //
696 // for each digit
697 // value = value * base + digit
698 // value *= sign
699 //
700 // The classic loop needs overflow checking. It also fails on the most
701 // negative integer, -2147483648 in 32-bit two's complement representation.
702 //
703 // My improved loop:
704 //
705 // if (!negative)
706 // for each digit
707 // value = value * base
708 // value = value + digit
709 // else
710 // for each digit
711 // value = value * base
712 // value = value - digit
713 //
714 // Overflow checking becomes simple.
715
716 // Lookup tables per IntType:
717 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
718 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
719 // struct of arrays) would probably be better in terms of d-cache for the most
720 // commonly used bases.
721 template <typename IntType>
722 struct LookupTables {
723 ABSL_CONST_INIT static const IntType kVmaxOverBase[];
724 ABSL_CONST_INIT static const IntType kVminOverBase[];
725 };
726
727 // An array initializer macro for X/base where base in [0, 36].
728 // However, note that lookups for base in [0, 1] should never happen because
729 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
730 #define X_OVER_BASE_INITIALIZER(X) \
731 { \
732 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
733 X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
734 X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
735 X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
736 X / 35, X / 36, \
737 }
738
739 // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
740 // array to avoid a static initializer.
741 template <>
742 const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
743 0,
744 0,
745 MakeUint128(9223372036854775807u, 18446744073709551615u),
746 MakeUint128(6148914691236517205u, 6148914691236517205u),
747 MakeUint128(4611686018427387903u, 18446744073709551615u),
748 MakeUint128(3689348814741910323u, 3689348814741910323u),
749 MakeUint128(3074457345618258602u, 12297829382473034410u),
750 MakeUint128(2635249153387078802u, 5270498306774157604u),
751 MakeUint128(2305843009213693951u, 18446744073709551615u),
752 MakeUint128(2049638230412172401u, 14347467612885206812u),
753 MakeUint128(1844674407370955161u, 11068046444225730969u),
754 MakeUint128(1676976733973595601u, 8384883669867978007u),
755 MakeUint128(1537228672809129301u, 6148914691236517205u),
756 MakeUint128(1418980313362273201u, 4256940940086819603u),
757 MakeUint128(1317624576693539401u, 2635249153387078802u),
758 MakeUint128(1229782938247303441u, 1229782938247303441u),
759 MakeUint128(1152921504606846975u, 18446744073709551615u),
760 MakeUint128(1085102592571150095u, 1085102592571150095u),
761 MakeUint128(1024819115206086200u, 16397105843297379214u),
762 MakeUint128(970881267037344821u, 16504981539634861972u),
763 MakeUint128(922337203685477580u, 14757395258967641292u),
764 MakeUint128(878416384462359600u, 14054662151397753612u),
765 MakeUint128(838488366986797800u, 13415813871788764811u),
766 MakeUint128(802032351030850070u, 4812194106185100421u),
767 MakeUint128(768614336404564650u, 12297829382473034410u),
768 MakeUint128(737869762948382064u, 11805916207174113034u),
769 MakeUint128(709490156681136600u, 11351842506898185609u),
770 MakeUint128(683212743470724133u, 17080318586768103348u),
771 MakeUint128(658812288346769700u, 10540996613548315209u),
772 MakeUint128(636094623231363848u, 15266270957552732371u),
773 MakeUint128(614891469123651720u, 9838263505978427528u),
774 MakeUint128(595056260442243600u, 9520900167075897608u),
775 MakeUint128(576460752303423487u, 18446744073709551615u),
776 MakeUint128(558992244657865200u, 8943875914525843207u),
777 MakeUint128(542551296285575047u, 9765923333140350855u),
778 MakeUint128(527049830677415760u, 8432797290838652167u),
779 MakeUint128(512409557603043100u, 8198552921648689607u),
780 };
781
782 template <typename IntType>
783 const IntType LookupTables<IntType>::kVmaxOverBase[] =
784 X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
785
786 template <typename IntType>
787 const IntType LookupTables<IntType>::kVminOverBase[] =
788 X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
789
790 #undef X_OVER_BASE_INITIALIZER
791
792 template <typename IntType>
safe_parse_positive_int(absl::string_view text,int base,IntType * value_p)793 inline bool safe_parse_positive_int(absl::string_view text, int base,
794 IntType* value_p) {
795 IntType value = 0;
796 const IntType vmax = std::numeric_limits<IntType>::max();
797 assert(vmax > 0);
798 assert(base >= 0);
799 assert(vmax >= static_cast<IntType>(base));
800 const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
801 assert(base < 2 ||
802 std::numeric_limits<IntType>::max() / base == vmax_over_base);
803 const char* start = text.data();
804 const char* end = start + text.size();
805 // loop over digits
806 for (; start < end; ++start) {
807 unsigned char c = static_cast<unsigned char>(start[0]);
808 int digit = kAsciiToInt[c];
809 if (digit >= base) {
810 *value_p = value;
811 return false;
812 }
813 if (value > vmax_over_base) {
814 *value_p = vmax;
815 return false;
816 }
817 value *= base;
818 if (value > vmax - digit) {
819 *value_p = vmax;
820 return false;
821 }
822 value += digit;
823 }
824 *value_p = value;
825 return true;
826 }
827
828 template <typename IntType>
safe_parse_negative_int(absl::string_view text,int base,IntType * value_p)829 inline bool safe_parse_negative_int(absl::string_view text, int base,
830 IntType* value_p) {
831 IntType value = 0;
832 const IntType vmin = std::numeric_limits<IntType>::min();
833 assert(vmin < 0);
834 assert(vmin <= 0 - base);
835 IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
836 assert(base < 2 ||
837 std::numeric_limits<IntType>::min() / base == vmin_over_base);
838 // 2003 c++ standard [expr.mul]
839 // "... the sign of the remainder is implementation-defined."
840 // Although (vmin/base)*base + vmin%base is always vmin.
841 // 2011 c++ standard tightens the spec but we cannot rely on it.
842 // TODO(junyer): Handle this in the lookup table generation.
843 if (vmin % base > 0) {
844 vmin_over_base += 1;
845 }
846 const char* start = text.data();
847 const char* end = start + text.size();
848 // loop over digits
849 for (; start < end; ++start) {
850 unsigned char c = static_cast<unsigned char>(start[0]);
851 int digit = kAsciiToInt[c];
852 if (digit >= base) {
853 *value_p = value;
854 return false;
855 }
856 if (value < vmin_over_base) {
857 *value_p = vmin;
858 return false;
859 }
860 value *= base;
861 if (value < vmin + digit) {
862 *value_p = vmin;
863 return false;
864 }
865 value -= digit;
866 }
867 *value_p = value;
868 return true;
869 }
870
871 // Input format based on POSIX.1-2008 strtol
872 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
873 template <typename IntType>
safe_int_internal(absl::string_view text,IntType * value_p,int base)874 inline bool safe_int_internal(absl::string_view text, IntType* value_p,
875 int base) {
876 *value_p = 0;
877 bool negative;
878 if (!safe_parse_sign_and_base(&text, &base, &negative)) {
879 return false;
880 }
881 if (!negative) {
882 return safe_parse_positive_int(text, base, value_p);
883 } else {
884 return safe_parse_negative_int(text, base, value_p);
885 }
886 }
887
888 template <typename IntType>
safe_uint_internal(absl::string_view text,IntType * value_p,int base)889 inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
890 int base) {
891 *value_p = 0;
892 bool negative;
893 if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
894 return false;
895 }
896 return safe_parse_positive_int(text, base, value_p);
897 }
898 } // anonymous namespace
899
900 namespace numbers_internal {
901
902 // Digit conversion.
903 ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
904 "0123456789abcdef";
905
906 ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
907 "000102030405060708090a0b0c0d0e0f"
908 "101112131415161718191a1b1c1d1e1f"
909 "202122232425262728292a2b2c2d2e2f"
910 "303132333435363738393a3b3c3d3e3f"
911 "404142434445464748494a4b4c4d4e4f"
912 "505152535455565758595a5b5c5d5e5f"
913 "606162636465666768696a6b6c6d6e6f"
914 "707172737475767778797a7b7c7d7e7f"
915 "808182838485868788898a8b8c8d8e8f"
916 "909192939495969798999a9b9c9d9e9f"
917 "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
918 "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
919 "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
920 "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
921 "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
922 "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
923
924 ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = {
925 {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
926 {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
927 {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
928 {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
929 {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
930 {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
931 {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
932 {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
933 {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
934 {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
935 {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
936 {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
937 {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
938 {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
939 {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
940 {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
941 {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
942
safe_strto32_base(absl::string_view text,int32_t * value,int base)943 bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
944 return safe_int_internal<int32_t>(text, value, base);
945 }
946
safe_strto64_base(absl::string_view text,int64_t * value,int base)947 bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
948 return safe_int_internal<int64_t>(text, value, base);
949 }
950
safe_strtou32_base(absl::string_view text,uint32_t * value,int base)951 bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
952 return safe_uint_internal<uint32_t>(text, value, base);
953 }
954
safe_strtou64_base(absl::string_view text,uint64_t * value,int base)955 bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
956 return safe_uint_internal<uint64_t>(text, value, base);
957 }
958
safe_strtou128_base(absl::string_view text,uint128 * value,int base)959 bool safe_strtou128_base(absl::string_view text, uint128* value, int base) {
960 return safe_uint_internal<absl::uint128>(text, value, base);
961 }
962
963 } // namespace numbers_internal
964 ABSL_NAMESPACE_END
965 } // namespace absl
966