1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/charconv_parse.h"
16
17 #include <string>
18 #include <utility>
19
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #include "absl/base/internal/raw_logging.h"
23 #include "absl/strings/str_cat.h"
24
25 using absl::chars_format;
26 using absl::strings_internal::FloatType;
27 using absl::strings_internal::ParsedFloat;
28 using absl::strings_internal::ParseFloat;
29
30 namespace {
31
32 // Check that a given string input is parsed to the expected mantissa and
33 // exponent.
34 //
35 // Input string `s` must contain a '$' character. It marks the end of the
36 // characters that should be consumed by the match. It is stripped from the
37 // input to ParseFloat.
38 //
39 // If input string `s` contains '[' and ']' characters, these mark the region
40 // of characters that should be marked as the "subrange". For NaNs, this is
41 // the location of the extended NaN string. For numbers, this is the location
42 // of the full, over-large mantissa.
43 template <int base>
ExpectParsedFloat(std::string s,absl::chars_format format_flags,FloatType expected_type,uint64_t expected_mantissa,int expected_exponent,int expected_literal_exponent=-999)44 void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
45 FloatType expected_type, uint64_t expected_mantissa,
46 int expected_exponent,
47 int expected_literal_exponent = -999) {
48 SCOPED_TRACE(s);
49
50 int begin_subrange = -1;
51 int end_subrange = -1;
52 // If s contains '[' and ']', then strip these characters and set the subrange
53 // indices appropriately.
54 std::string::size_type open_bracket_pos = s.find('[');
55 if (open_bracket_pos != std::string::npos) {
56 begin_subrange = static_cast<int>(open_bracket_pos);
57 s.replace(open_bracket_pos, 1, "");
58 std::string::size_type close_bracket_pos = s.find(']');
59 ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
60 "Test input contains [ without matching ]");
61 end_subrange = static_cast<int>(close_bracket_pos);
62 s.replace(close_bracket_pos, 1, "");
63 }
64 const std::string::size_type expected_characters_matched = s.find('$');
65 ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
66 "Input string must contain $");
67 s.replace(expected_characters_matched, 1, "");
68
69 ParsedFloat parsed =
70 ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
71
72 EXPECT_NE(parsed.end, nullptr);
73 if (parsed.end == nullptr) {
74 return; // The following tests are not useful if we fully failed to parse
75 }
76 EXPECT_EQ(parsed.type, expected_type);
77 if (begin_subrange == -1) {
78 EXPECT_EQ(parsed.subrange_begin, nullptr);
79 EXPECT_EQ(parsed.subrange_end, nullptr);
80 } else {
81 EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
82 EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
83 }
84 if (parsed.type == FloatType::kNumber) {
85 EXPECT_EQ(parsed.mantissa, expected_mantissa);
86 EXPECT_EQ(parsed.exponent, expected_exponent);
87 if (expected_literal_exponent != -999) {
88 EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
89 }
90 }
91 auto characters_matched = static_cast<int>(parsed.end - s.data());
92 EXPECT_EQ(characters_matched, expected_characters_matched);
93 }
94
95 // Check that a given string input is parsed to the expected mantissa and
96 // exponent.
97 //
98 // Input string `s` must contain a '$' character. It marks the end of the
99 // characters that were consumed by the match.
100 template <int base>
ExpectNumber(std::string s,absl::chars_format format_flags,uint64_t expected_mantissa,int expected_exponent,int expected_literal_exponent=-999)101 void ExpectNumber(std::string s, absl::chars_format format_flags,
102 uint64_t expected_mantissa, int expected_exponent,
103 int expected_literal_exponent = -999) {
104 ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
105 expected_mantissa, expected_exponent,
106 expected_literal_exponent);
107 }
108
109 // Check that a given string input is parsed to the given special value.
110 //
111 // This tests against both number bases, since infinities and NaNs have
112 // identical representations in both modes.
ExpectSpecial(const std::string & s,absl::chars_format format_flags,FloatType type)113 void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
114 FloatType type) {
115 ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
116 ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
117 }
118
119 // Check that a given input string is not matched by Float.
120 template <int base>
ExpectFailedParse(absl::string_view s,absl::chars_format format_flags)121 void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
122 ParsedFloat parsed =
123 ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
124 EXPECT_EQ(parsed.end, nullptr);
125 }
126
TEST(ParseFloat,SimpleValue)127 TEST(ParseFloat, SimpleValue) {
128 // Test that various forms of floating point numbers all parse correctly.
129 ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
130 ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
131 ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
132 ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
133 ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
134 ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
135 ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
136 ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
137
138 ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
139 ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
140 ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
141 ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
142 -8);
143 ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
144 ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
145 -8);
146 ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
147 ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
148
149 // ExpectNumber does not attempt to drop trailing zeroes.
150 ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
151 -5);
152 ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
153 0x1234abcdef000, -20);
154
155 // Ensure non-matching characters after a number are ignored, even when they
156 // look like potentially matching characters.
157 ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
158 ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
159 ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
160 ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
161 ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
162 -3);
163 ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
164 ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
165 ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
166 ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
167
168 ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
169 -8);
170 ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
171 -8);
172 ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
173 -8);
174 ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
175 ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
176 0x1234abcdef, -8);
177 ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
178 -8);
179 ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
180 ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
181
182 // Ensure we can read a full resolution mantissa without overflow.
183 ExpectNumber<10>("9999999999999999999$", chars_format::general,
184 9999999999999999999u, 0);
185 ExpectNumber<16>("fffffffffffffff$", chars_format::general,
186 0xfffffffffffffffu, 0);
187
188 // Check that zero is consistently read.
189 ExpectNumber<10>("0$", chars_format::general, 0, 0);
190 ExpectNumber<16>("0$", chars_format::general, 0, 0);
191 ExpectNumber<10>("000000000000000000000000000000000000000$",
192 chars_format::general, 0, 0);
193 ExpectNumber<16>("000000000000000000000000000000000000000$",
194 chars_format::general, 0, 0);
195 ExpectNumber<10>("0000000000000000000000.000000000000000000$",
196 chars_format::general, 0, 0);
197 ExpectNumber<16>("0000000000000000000000.000000000000000000$",
198 chars_format::general, 0, 0);
199 ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
200 chars_format::general, 0, 0);
201 ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
202 chars_format::general, 0, 0);
203 }
204
TEST(ParseFloat,LargeDecimalMantissa)205 TEST(ParseFloat, LargeDecimalMantissa) {
206 // After 19 significant decimal digits in the mantissa, ParsedFloat will
207 // truncate additional digits. We need to test that:
208 // 1) the truncation to 19 digits happens
209 // 2) the returned exponent reflects the dropped significant digits
210 // 3) a correct literal_exponent is set
211 //
212 // If and only if a significant digit is found after 19 digits, then the
213 // entirety of the mantissa in case the exact value is needed to make a
214 // rounding decision. The [ and ] characters below denote where such a
215 // subregion was marked by by ParseFloat. They are not part of the input.
216
217 // Mark a capture group only if a dropped digit is significant (nonzero).
218 ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
219 1000000000000000000,
220 /* adjusted exponent */ 8);
221
222 ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
223 1234567891234567891,
224 /* adjusted exponent */ 8);
225
226 ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
227 1234567891234567891,
228 /* adjusted exponent */ 8,
229 /* literal exponent */ 0);
230
231 ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
232 1234567891234567891,
233 /* adjusted exponent */ 8,
234 /* literal exponent */ 0);
235
236 ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
237 1234567891234567891,
238 /* adjusted exponent */ 8,
239 /* literal exponent */ 0);
240
241 // Leading zeroes should not count towards the 19 significant digit limit
242 ExpectNumber<10>("[00000000123456789123456789123456789]$",
243 chars_format::general, 1234567891234567891,
244 /* adjusted exponent */ 8,
245 /* literal exponent */ 0);
246
247 ExpectNumber<10>("00000000123456789123456789100000000$",
248 chars_format::general, 1234567891234567891,
249 /* adjusted exponent */ 8);
250
251 // Truncated digits after the decimal point should not cause a further
252 // exponent adjustment.
253 ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
254 1234567891234567891, 105);
255 ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
256 1234567891234567891,
257 /* adjusted exponent */ 105,
258 /* literal exponent */ 123);
259
260 // Ensure we truncate, and not round. (The from_chars algorithm we use
261 // depends on our guess missing low, if it misses, so we need the rounding
262 // error to be downward.)
263 ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
264 1999999999999999999,
265 /* adjusted exponent */ 3,
266 /* literal exponent */ 0);
267 }
268
TEST(ParseFloat,LargeHexadecimalMantissa)269 TEST(ParseFloat, LargeHexadecimalMantissa) {
270 // After 15 significant hex digits in the mantissa, ParsedFloat will treat
271 // additional digits as sticky, We need to test that:
272 // 1) The truncation to 15 digits happens
273 // 2) The returned exponent reflects the dropped significant digits
274 // 3) If a nonzero digit is dropped, the low bit of mantissa is set.
275
276 ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
277 0x123456789abcdef, 60);
278
279 // Leading zeroes should not count towards the 15 significant digit limit
280 ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
281 chars_format::general, 0x123456789abcdef, 60);
282
283 // Truncated digits after the radix point should not cause a further
284 // exponent adjustment.
285 ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
286 0x123456789abcdef, 44);
287 ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
288 chars_format::general, 0x123456789abcdef, 44);
289
290 // test sticky digit behavior. The low bit should be set iff any dropped
291 // digit is nonzero.
292 ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
293 0x123456789abcdef, 60);
294 ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
295 0x123456789abcdef, 60);
296 ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
297 0x123456789abcdee, 60);
298 }
299
TEST(ParseFloat,ScientificVsFixed)300 TEST(ParseFloat, ScientificVsFixed) {
301 // In fixed mode, an exponent is never matched (but the remainder of the
302 // number will be matched.)
303 ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
304 ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
305 ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
306 ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
307
308 // In scientific mode, numbers don't match *unless* they have an exponent.
309 ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
310 ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
311 ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
312 -8);
313 ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
314 }
315
TEST(ParseFloat,Infinity)316 TEST(ParseFloat, Infinity) {
317 ExpectFailedParse<10>("in", chars_format::general);
318 ExpectFailedParse<16>("in", chars_format::general);
319 ExpectFailedParse<10>("inx", chars_format::general);
320 ExpectFailedParse<16>("inx", chars_format::general);
321 ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
322 ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
323 ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
324 ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
325 ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
326 ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
327 }
328
TEST(ParseFloat,NaN)329 TEST(ParseFloat, NaN) {
330 ExpectFailedParse<10>("na", chars_format::general);
331 ExpectFailedParse<16>("na", chars_format::general);
332 ExpectFailedParse<10>("nah", chars_format::general);
333 ExpectFailedParse<16>("nah", chars_format::general);
334 ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
335 ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
336 ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
337 ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
338 ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
339
340 // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
341 // appear after an NaN. Check that this is allowed, and that the correct
342 // characters are grouped.
343 //
344 // (The characters [ and ] in the pattern below delimit the expected matched
345 // subgroup; they are not part of the input passed to ParseFloat.)
346 ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
347 ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
348 ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
349 ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
350 ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
351 // If the subgroup contains illegal characters, don't match it at all.
352 ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
353 // Also cope with a missing close paren.
354 ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
355 }
356
357 } // namespace
358