1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "base/json/json_parser.h"
11
12 #include <cmath>
13 #include <iterator>
14 #include <string_view>
15 #include <utility>
16 #include <vector>
17
18 #include "base/check_op.h"
19 #include "base/feature_list.h"
20 #include "base/features.h"
21 #include "base/json/json_reader.h"
22 #include "base/metrics/histogram_functions.h"
23 #include "base/notreached.h"
24 #include "base/numerics/safe_conversions.h"
25 #include "base/ranges/algorithm.h"
26 #include "base/strings/string_number_conversions.h"
27 #include "base/strings/string_util.h"
28 #include "base/strings/stringprintf.h"
29 #include "base/strings/utf_string_conversion_utils.h"
30 #include "base/strings/utf_string_conversions.h"
31 #include "base/third_party/icu/icu_utf.h"
32
33 namespace base {
34 namespace internal {
35
36 namespace {
37
38 // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
39 static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
40 "JSONParser error out of bounds");
41
ErrorCodeToString(JSONParser::JsonParseError error_code)42 std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
43 switch (error_code) {
44 case JSONParser::JSON_NO_ERROR:
45 return std::string();
46 case JSONParser::JSON_SYNTAX_ERROR:
47 return JSONParser::kSyntaxError;
48 case JSONParser::JSON_INVALID_ESCAPE:
49 return JSONParser::kInvalidEscape;
50 case JSONParser::JSON_UNEXPECTED_TOKEN:
51 return JSONParser::kUnexpectedToken;
52 case JSONParser::JSON_TRAILING_COMMA:
53 return JSONParser::kTrailingComma;
54 case JSONParser::JSON_TOO_MUCH_NESTING:
55 return JSONParser::kTooMuchNesting;
56 case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
57 return JSONParser::kUnexpectedDataAfterRoot;
58 case JSONParser::JSON_UNSUPPORTED_ENCODING:
59 return JSONParser::kUnsupportedEncoding;
60 case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
61 return JSONParser::kUnquotedDictionaryKey;
62 case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
63 return JSONParser::kUnrepresentableNumber;
64 case JSONParser::JSON_PARSE_ERROR_COUNT:
65 NOTREACHED();
66 }
67 NOTREACHED();
68 }
69
70 const int32_t kExtendedASCIIStart = 0x80;
71 constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
72
73 // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
74 // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
75 // permitted.
UnprefixedHexStringToInt(std::string_view input,int * output)76 bool UnprefixedHexStringToInt(std::string_view input, int* output) {
77 for (size_t i = 0; i < input.size(); i++) {
78 if (!IsHexDigit(input[i])) {
79 return false;
80 }
81 }
82 return HexStringToInt(input, output);
83 }
84
85 // These values are persisted to logs. Entries should not be renumbered and
86 // numeric values should never be reused.
87 enum class ChromiumJsonExtension {
88 kCComment,
89 kCppComment,
90 kXEscape,
91 kVerticalTabEscape,
92 kControlCharacter,
93 kNewlineInString,
94 kMaxValue = kNewlineInString,
95 };
96
97 const char kExtensionHistogramName[] =
98 "Security.JSONParser.ChromiumExtensionUsage";
99
100 } // namespace
101
102 // This is U+FFFD.
103 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
104
105 const char JSONParser::kSyntaxError[] = "Syntax error.";
106 const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
107 const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
108 const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
109 const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
110 const char JSONParser::kUnexpectedDataAfterRoot[] =
111 "Unexpected data after root element.";
112 const char JSONParser::kUnsupportedEncoding[] =
113 "Unsupported encoding. JSON must be UTF-8.";
114 const char JSONParser::kUnquotedDictionaryKey[] =
115 "Dictionary keys must be quoted.";
116 const char JSONParser::kUnrepresentableNumber[] =
117 "Number cannot be represented.";
118
JSONParser(int options,size_t max_depth)119 JSONParser::JSONParser(int options, size_t max_depth)
120 : options_(options),
121 max_depth_(max_depth),
122 index_(0),
123 stack_depth_(0),
124 line_number_(0),
125 index_last_line_(0),
126 error_code_(JSON_NO_ERROR),
127 error_line_(0),
128 error_column_(0) {
129 CHECK_LE(max_depth, kAbsoluteMaxDepth);
130 }
131
132 JSONParser::~JSONParser() = default;
133
Parse(std::string_view input)134 std::optional<Value> JSONParser::Parse(std::string_view input) {
135 input_ = input;
136 index_ = 0;
137 // Line and column counting is 1-based, but |index_| is 0-based. For example,
138 // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
139 // 2) and have indexes of 0 and 4. We track the line number explicitly (the
140 // |line_number_| field) and the column number implicitly (the difference
141 // between |index_| and |index_last_line_|). In calculating that difference,
142 // |index_last_line_| is the index of the '\r' or '\n', not the index of the
143 // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
144 // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
145 // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
146 // index of the imaginary '\n' immediately before the start of the string:
147 // 'A' is in column (0 - -1) = 1.
148 line_number_ = 1;
149 index_last_line_ = static_cast<size_t>(-1);
150
151 error_code_ = JSON_NO_ERROR;
152 error_line_ = 0;
153 error_column_ = 0;
154
155 // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
156 // advance the start position to avoid the ParseNextToken function mis-
157 // treating a Unicode BOM as an invalid character and returning NULL.
158 ConsumeIfMatch("\xEF\xBB\xBF");
159
160 // Parse the first and any nested tokens.
161 std::optional<Value> root(ParseNextToken());
162 if (!root)
163 return std::nullopt;
164
165 // Make sure the input stream is at an end.
166 if (GetNextToken() != T_END_OF_INPUT) {
167 ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
168 return std::nullopt;
169 }
170
171 return root;
172 }
173
error_code() const174 JSONParser::JsonParseError JSONParser::error_code() const {
175 return error_code_;
176 }
177
GetErrorMessage() const178 std::string JSONParser::GetErrorMessage() const {
179 return FormatErrorMessage(error_line_, error_column_,
180 ErrorCodeToString(error_code_));
181 }
182
error_line() const183 int JSONParser::error_line() const {
184 return error_line_;
185 }
186
error_column() const187 int JSONParser::error_column() const {
188 return error_column_;
189 }
190
191 // JSONParser private //////////////////////////////////////////////////////////
192
PeekChars(size_t count)193 std::optional<std::string_view> JSONParser::PeekChars(size_t count) {
194 if (count > input_.length() - index_) {
195 return std::nullopt;
196 }
197 // Using string_view::substr() was historically significantly slower
198 // (according to base_perftests) than constructing a substring manually.
199 //
200 // TODO(crbug.com/40284755): Is this still the case? Ideally the bounds check
201 // performed by substr would be deleted by the optimizer for being redundant
202 // with the runtime check above. However, to do so, the compiler would need
203 // to know `index_ <= input_.length()` is a class invariant. If we
204 // restructured the code so that we only stored the remaining data, that
205 // would avoid this, but it would prevent rewinding (the places in this file
206 // which look at `input_[index_ - 1]`.)
207 return std::string_view(input_.data() + index_, count);
208 }
209
PeekChar()210 std::optional<char> JSONParser::PeekChar() {
211 std::optional<std::string_view> chars = PeekChars(1);
212 if (chars)
213 return (*chars)[0];
214 return std::nullopt;
215 }
216
ConsumeChars(size_t count)217 std::optional<std::string_view> JSONParser::ConsumeChars(size_t count) {
218 std::optional<std::string_view> chars = PeekChars(count);
219 if (chars)
220 index_ += count;
221 return chars;
222 }
223
ConsumeChar()224 std::optional<char> JSONParser::ConsumeChar() {
225 std::optional<std::string_view> chars = ConsumeChars(1);
226 if (chars)
227 return (*chars)[0];
228 return std::nullopt;
229 }
230
pos()231 const char* JSONParser::pos() {
232 CHECK_LE(index_, input_.length());
233 return input_.data() + index_;
234 }
235
GetNextToken()236 JSONParser::Token JSONParser::GetNextToken() {
237 EatWhitespaceAndComments();
238
239 std::optional<char> c = PeekChar();
240 if (!c)
241 return T_END_OF_INPUT;
242
243 switch (*c) {
244 case '{':
245 return T_OBJECT_BEGIN;
246 case '}':
247 return T_OBJECT_END;
248 case '[':
249 return T_ARRAY_BEGIN;
250 case ']':
251 return T_ARRAY_END;
252 case '"':
253 return T_STRING;
254 case '0':
255 case '1':
256 case '2':
257 case '3':
258 case '4':
259 case '5':
260 case '6':
261 case '7':
262 case '8':
263 case '9':
264 case '-':
265 return T_NUMBER;
266 case 't':
267 return T_BOOL_TRUE;
268 case 'f':
269 return T_BOOL_FALSE;
270 case 'n':
271 return T_NULL;
272 case ',':
273 return T_LIST_SEPARATOR;
274 case ':':
275 return T_OBJECT_PAIR_SEPARATOR;
276 default:
277 return T_INVALID_TOKEN;
278 }
279 }
280
EatWhitespaceAndComments()281 void JSONParser::EatWhitespaceAndComments() {
282 while (std::optional<char> c = PeekChar()) {
283 switch (*c) {
284 case '\r':
285 case '\n':
286 index_last_line_ = index_;
287 // Don't increment line_number_ twice for "\r\n".
288 if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
289 ++line_number_;
290 }
291 [[fallthrough]];
292 case ' ':
293 case '\t':
294 ConsumeChar();
295 break;
296 case '/':
297 if (!EatComment())
298 return;
299 break;
300 default:
301 return;
302 }
303 }
304 }
305
EatComment()306 bool JSONParser::EatComment() {
307 std::optional<std::string_view> comment_start = PeekChars(2);
308 if (!comment_start)
309 return false;
310
311 const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
312
313 if (comment_start == "//") {
314 UmaHistogramEnumeration(kExtensionHistogramName,
315 ChromiumJsonExtension::kCppComment);
316 if (!comments_allowed) {
317 ReportError(JSON_UNEXPECTED_TOKEN, 0);
318 return false;
319 }
320
321 ConsumeChars(2);
322 // Single line comment, read to newline.
323 while (std::optional<char> c = PeekChar()) {
324 if (c == '\n' || c == '\r')
325 return true;
326 ConsumeChar();
327 }
328 } else if (comment_start == "/*") {
329 UmaHistogramEnumeration(kExtensionHistogramName,
330 ChromiumJsonExtension::kCComment);
331 if (!comments_allowed) {
332 ReportError(JSON_UNEXPECTED_TOKEN, 0);
333 return false;
334 }
335
336 ConsumeChars(2);
337 char previous_char = '\0';
338 // Block comment, read until end marker.
339 while (std::optional<char> c = PeekChar()) {
340 if (previous_char == '*' && c == '/') {
341 // EatWhitespaceAndComments will inspect pos(), which will still be on
342 // the last / of the comment, so advance once more (which may also be
343 // end of input).
344 ConsumeChar();
345 return true;
346 }
347 previous_char = *ConsumeChar();
348 }
349
350 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
351 }
352
353 return false;
354 }
355
ParseNextToken()356 std::optional<Value> JSONParser::ParseNextToken() {
357 return ParseToken(GetNextToken());
358 }
359
ParseToken(Token token)360 std::optional<Value> JSONParser::ParseToken(Token token) {
361 switch (token) {
362 case T_OBJECT_BEGIN:
363 return ConsumeDictionary();
364 case T_ARRAY_BEGIN:
365 return ConsumeList();
366 case T_STRING:
367 return ConsumeString();
368 case T_NUMBER:
369 return ConsumeNumber();
370 case T_BOOL_TRUE:
371 case T_BOOL_FALSE:
372 case T_NULL:
373 return ConsumeLiteral();
374 default:
375 ReportError(JSON_UNEXPECTED_TOKEN, 0);
376 return std::nullopt;
377 }
378 }
379
ConsumeDictionary()380 std::optional<Value> JSONParser::ConsumeDictionary() {
381 if (ConsumeChar() != '{') {
382 ReportError(JSON_UNEXPECTED_TOKEN, 0);
383 return std::nullopt;
384 }
385
386 StackMarker depth_check(max_depth_, &stack_depth_);
387 if (depth_check.IsTooDeep()) {
388 ReportError(JSON_TOO_MUCH_NESTING, -1);
389 return std::nullopt;
390 }
391
392 std::vector<std::pair<std::string, Value>> values;
393
394 Token token = GetNextToken();
395 while (token != T_OBJECT_END) {
396 if (token != T_STRING) {
397 ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
398 return std::nullopt;
399 }
400
401 // First consume the key.
402 std::optional<std::string> key = ConsumeStringRaw();
403 if (!key) {
404 return std::nullopt;
405 }
406
407 // Read the separator.
408 token = GetNextToken();
409 if (token != T_OBJECT_PAIR_SEPARATOR) {
410 ReportError(JSON_SYNTAX_ERROR, 0);
411 return std::nullopt;
412 }
413
414 // The next token is the value. Ownership transfers to |dict|.
415 ConsumeChar();
416 std::optional<Value> value = ParseNextToken();
417 if (!value) {
418 // ReportError from deeper level.
419 return std::nullopt;
420 }
421
422 values.emplace_back(std::move(*key), std::move(*value));
423
424 token = GetNextToken();
425 if (token == T_LIST_SEPARATOR) {
426 ConsumeChar();
427 token = GetNextToken();
428 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
429 ReportError(JSON_TRAILING_COMMA, 0);
430 return std::nullopt;
431 }
432 } else if (token != T_OBJECT_END) {
433 ReportError(JSON_SYNTAX_ERROR, 0);
434 return std::nullopt;
435 }
436 }
437
438 ConsumeChar(); // Closing '}'.
439 // Reverse |dict_storage| to keep the last of elements with the same key in
440 // the input.
441 ranges::reverse(values);
442 return Value(Value::Dict(std::make_move_iterator(values.begin()),
443 std::make_move_iterator(values.end())));
444 }
445
ConsumeList()446 std::optional<Value> JSONParser::ConsumeList() {
447 if (ConsumeChar() != '[') {
448 ReportError(JSON_UNEXPECTED_TOKEN, 0);
449 return std::nullopt;
450 }
451
452 StackMarker depth_check(max_depth_, &stack_depth_);
453 if (depth_check.IsTooDeep()) {
454 ReportError(JSON_TOO_MUCH_NESTING, -1);
455 return std::nullopt;
456 }
457
458 Value::List list;
459
460 Token token = GetNextToken();
461 while (token != T_ARRAY_END) {
462 std::optional<Value> item = ParseToken(token);
463 if (!item) {
464 // ReportError from deeper level.
465 return std::nullopt;
466 }
467
468 list.Append(std::move(*item));
469
470 token = GetNextToken();
471 if (token == T_LIST_SEPARATOR) {
472 ConsumeChar();
473 token = GetNextToken();
474 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
475 ReportError(JSON_TRAILING_COMMA, 0);
476 return std::nullopt;
477 }
478 } else if (token != T_ARRAY_END) {
479 ReportError(JSON_SYNTAX_ERROR, 0);
480 return std::nullopt;
481 }
482 }
483
484 ConsumeChar(); // Closing ']'.
485
486 return Value(std::move(list));
487 }
488
ConsumeString()489 std::optional<Value> JSONParser::ConsumeString() {
490 std::optional<std::string> string = ConsumeStringRaw();
491 if (!string) {
492 return std::nullopt;
493 }
494 return Value(std::move(*string));
495 }
496
ConsumeStringRaw()497 std::optional<std::string> JSONParser::ConsumeStringRaw() {
498 if (ConsumeChar() != '"') {
499 ReportError(JSON_UNEXPECTED_TOKEN, 0);
500 return std::nullopt;
501 }
502
503 std::string string;
504 for (;;) {
505 auto [result, consumed] = ConsumeStringPart();
506 switch (result) {
507 case StringResult::kError:
508 return std::nullopt;
509
510 case StringResult::kDone:
511 // This is the last time we're appending, so pre-reserve the desired
512 // size, to prevent `+=` from overallocating. (In other cases, the
513 // overallocating is desirable for amortization.) In particular,
514 // the common case is that `string` is empty and we return in one step.
515 string.reserve(string.size() + consumed.size());
516 string += consumed;
517 return std::move(string);
518
519 case StringResult::kReplacementCharacter:
520 string += consumed;
521 string += kUnicodeReplacementString;
522 break; // Keep parsing.
523
524 case StringResult::kEscape:
525 string += consumed;
526 std::optional<char> escape_char = ConsumeChar();
527 if (!escape_char) {
528 ReportError(JSON_INVALID_ESCAPE, -1);
529 return std::nullopt;
530 }
531
532 switch (*escape_char) {
533 // Allowed esape sequences:
534 case 'x': { // UTF-8 sequence.
535 // UTF-8 \x escape sequences are not allowed in the spec, but they
536 // are supported here for backwards-compatiblity with the old
537 // parser.
538 UmaHistogramEnumeration(kExtensionHistogramName,
539 ChromiumJsonExtension::kXEscape);
540 if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
541 ReportError(JSON_INVALID_ESCAPE, -1);
542 return std::nullopt;
543 }
544
545 std::optional<std::string_view> escape_sequence = ConsumeChars(2);
546 if (!escape_sequence) {
547 ReportError(JSON_INVALID_ESCAPE, -3);
548 return std::nullopt;
549 }
550
551 int hex_digit = 0;
552 if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit)) {
553 ReportError(JSON_INVALID_ESCAPE, -3);
554 return std::nullopt;
555 }
556
557 // A two-character hex sequence is at most 0xff and all codepoints
558 // up to 0xff are valid.
559 DCHECK_LE(hex_digit, 0xff);
560 DCHECK(IsValidCharacter(hex_digit));
561 WriteUnicodeCharacter(hex_digit, &string);
562 break;
563 }
564 case 'u': { // UTF-16 sequence.
565 // UTF units are of the form \uXXXX.
566 base_icu::UChar32 code_point;
567 if (!DecodeUTF16(&code_point)) {
568 ReportError(JSON_INVALID_ESCAPE, -1);
569 return std::nullopt;
570 }
571 WriteUnicodeCharacter(code_point, &string);
572 break;
573 }
574 case '"':
575 string.push_back('"');
576 break;
577 case '\\':
578 string.push_back('\\');
579 break;
580 case '/':
581 string.push_back('/');
582 break;
583 case 'b':
584 string.push_back('\b');
585 break;
586 case 'f':
587 string.push_back('\f');
588 break;
589 case 'n':
590 string.push_back('\n');
591 break;
592 case 'r':
593 string.push_back('\r');
594 break;
595 case 't':
596 string.push_back('\t');
597 break;
598 case 'v': // Not listed as valid escape sequence in the RFC.
599 UmaHistogramEnumeration(kExtensionHistogramName,
600 ChromiumJsonExtension::kVerticalTabEscape);
601 if (!(options_ & JSON_ALLOW_VERT_TAB)) {
602 ReportError(JSON_INVALID_ESCAPE, -1);
603 return std::nullopt;
604 }
605 string.push_back('\v');
606 break;
607 // All other escape squences are illegal.
608 default:
609 ReportError(JSON_INVALID_ESCAPE, -1);
610 return std::nullopt;
611 }
612 break; // Keep parsing.
613 }
614 }
615 }
616
617 std::pair<JSONParser::StringResult, std::string_view>
ConsumeStringPart()618 JSONParser::ConsumeStringPart() {
619 const size_t start_index = index_;
620 while (std::optional<char> c = PeekChar()) {
621 // Handle non-ASCII characters, which never trigger any special handling
622 // beyond needing to be valid UTF-8. ASCII characters will be handled
623 // separately below.
624 if (static_cast<unsigned char>(*c) >= kExtendedASCIIStart) {
625 base_icu::UChar32 next_char = 0;
626 size_t last_index = index_;
627 if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
628 &next_char)) {
629 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
630 ReportError(JSON_UNSUPPORTED_ENCODING, 0);
631 // No need to return consumed data.
632 return {StringResult::kError, {}};
633 }
634 ConsumeChar();
635 return {StringResult::kReplacementCharacter,
636 input_.substr(start_index, last_index - start_index)};
637 }
638
639 // Valid UTF-8 will be copied as-is into the output, so keep processing.
640 DCHECK_GE(next_char, kExtendedASCIIStart);
641 ConsumeChar();
642 continue;
643 }
644
645 if (*c == '"') {
646 std::string_view ret = input_.substr(start_index, index_ - start_index);
647 ConsumeChar();
648 return {StringResult::kDone, ret};
649 }
650 if (*c == '\\') {
651 std::string_view ret = input_.substr(start_index, index_ - start_index);
652 ConsumeChar();
653 return {StringResult::kEscape, ret};
654 }
655
656 // Per Section 7, "All Unicode characters may be placed within the
657 // quotation marks, except for the characters that MUST be escaped:
658 // quotation mark, reverse solidus, and the control characters (U+0000
659 // through U+001F)".
660 if (*c == '\n' || *c == '\r') {
661 UmaHistogramEnumeration(kExtensionHistogramName,
662 ChromiumJsonExtension::kNewlineInString);
663 if (!(options_ &
664 (JSON_ALLOW_NEWLINES_IN_STRINGS | JSON_ALLOW_CONTROL_CHARS))) {
665 ReportError(JSON_UNSUPPORTED_ENCODING, -1);
666 return {StringResult::kError, {}}; // No need to return consumed data.
667 }
668 } else if (*c <= 0x1F) {
669 UmaHistogramEnumeration(kExtensionHistogramName,
670 ChromiumJsonExtension::kControlCharacter);
671 if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
672 ReportError(JSON_UNSUPPORTED_ENCODING, -1);
673 return {StringResult::kError, {}}; // No need to return consumed data.
674 }
675 }
676
677 // If this character is not an escape sequence, track any line breaks and
678 // keep parsing. The JSON spec forbids unescaped ASCII control characters
679 // within a string, including '\r' and '\n', but this implementation is more
680 // lenient.
681 if (*c == '\r' || *c == '\n') {
682 index_last_line_ = index_;
683 // Don't increment line_number_ twice for "\r\n". We are guaranteed that
684 // (index_ > 0) because we are consuming a string, so we must have seen an
685 // opening '"' quote character.
686 if ((*c == '\r') || (input_[index_ - 1] != '\r')) {
687 ++line_number_;
688 }
689 }
690 ConsumeChar();
691 }
692
693 ReportError(JSON_SYNTAX_ERROR, -1);
694 return {StringResult::kError, {}}; // No need to return consumed data.
695 }
696
697 // Entry is at the first X in \uXXXX.
DecodeUTF16(base_icu::UChar32 * out_code_point)698 bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
699 std::optional<std::string_view> escape_sequence = ConsumeChars(4);
700 if (!escape_sequence)
701 return false;
702
703 // Consume the UTF-16 code unit, which may be a high surrogate.
704 int code_unit16_high = 0;
705 if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
706 return false;
707
708 // If this is a high surrogate, consume the next code unit to get the
709 // low surrogate.
710 if (CBU16_IS_SURROGATE(code_unit16_high)) {
711 // Make sure this is the high surrogate.
712 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
713 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
714 return false;
715 *out_code_point = kUnicodeReplacementPoint;
716 return true;
717 }
718
719 // Make sure that the token has more characters to consume the
720 // lower surrogate.
721 if (!ConsumeIfMatch("\\u")) {
722 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
723 return false;
724 *out_code_point = kUnicodeReplacementPoint;
725 return true;
726 }
727
728 escape_sequence = ConsumeChars(4);
729 if (!escape_sequence)
730 return false;
731
732 int code_unit16_low = 0;
733 if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
734 return false;
735
736 if (!CBU16_IS_TRAIL(code_unit16_low)) {
737 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
738 return false;
739 *out_code_point = kUnicodeReplacementPoint;
740 return true;
741 }
742
743 base_icu::UChar32 code_point =
744 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
745
746 *out_code_point = code_point;
747 } else {
748 // Not a surrogate.
749 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
750
751 *out_code_point = code_unit16_high;
752 }
753
754 return true;
755 }
756
ConsumeNumber()757 std::optional<Value> JSONParser::ConsumeNumber() {
758 const char* num_start = pos();
759 const size_t start_index = index_;
760 size_t end_index = start_index;
761
762 if (PeekChar() == '-')
763 ConsumeChar();
764
765 if (!ReadInt(false)) {
766 ReportError(JSON_SYNTAX_ERROR, 0);
767 return std::nullopt;
768 }
769 end_index = index_;
770
771 // The optional fraction part.
772 if (PeekChar() == '.') {
773 ConsumeChar();
774 if (!ReadInt(true)) {
775 ReportError(JSON_SYNTAX_ERROR, 0);
776 return std::nullopt;
777 }
778 end_index = index_;
779 }
780
781 // Optional exponent part.
782 std::optional<char> c = PeekChar();
783 if (c == 'e' || c == 'E') {
784 ConsumeChar();
785 if (PeekChar() == '-' || PeekChar() == '+') {
786 ConsumeChar();
787 }
788 if (!ReadInt(true)) {
789 ReportError(JSON_SYNTAX_ERROR, 0);
790 return std::nullopt;
791 }
792 end_index = index_;
793 }
794
795 std::string_view num_string(num_start, end_index - start_index);
796
797 int num_int;
798 if (StringToInt(num_string, &num_int)) {
799 // StringToInt will treat `-0` as zero, losing the significance of the
800 // negation.
801 if (num_int == 0 && num_string.starts_with('-')) {
802 return Value(-0.0);
803 }
804 return Value(num_int);
805 }
806
807 double num_double;
808 if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
809 return Value(num_double);
810 }
811
812 ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
813 return std::nullopt;
814 }
815
ReadInt(bool allow_leading_zeros)816 bool JSONParser::ReadInt(bool allow_leading_zeros) {
817 size_t len = 0;
818 char first = 0;
819
820 while (std::optional<char> c = PeekChar()) {
821 if (!IsAsciiDigit(*c)) {
822 break;
823 }
824
825 if (len == 0)
826 first = *c;
827
828 ++len;
829 ConsumeChar();
830 }
831
832 if (len == 0)
833 return false;
834
835 if (!allow_leading_zeros && len > 1 && first == '0')
836 return false;
837
838 return true;
839 }
840
ConsumeLiteral()841 std::optional<Value> JSONParser::ConsumeLiteral() {
842 if (ConsumeIfMatch("true"))
843 return Value(true);
844 if (ConsumeIfMatch("false"))
845 return Value(false);
846 if (ConsumeIfMatch("null"))
847 return Value(Value::Type::NONE);
848 ReportError(JSON_SYNTAX_ERROR, 0);
849 return std::nullopt;
850 }
851
ConsumeIfMatch(std::string_view match)852 bool JSONParser::ConsumeIfMatch(std::string_view match) {
853 if (match == PeekChars(match.size())) {
854 ConsumeChars(match.size());
855 return true;
856 }
857 return false;
858 }
859
ReportError(JsonParseError code,int column_adjust)860 void JSONParser::ReportError(JsonParseError code, int column_adjust) {
861 error_code_ = code;
862 error_line_ = line_number_;
863 error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
864
865 // For a final blank line ('\n' and then EOF), a negative column_adjust may
866 // put us below 1, which doesn't really make sense for 1-based columns.
867 if (error_column_ < 1) {
868 error_column_ = 1;
869 }
870 }
871
872 // static
FormatErrorMessage(int line,int column,const std::string & description)873 std::string JSONParser::FormatErrorMessage(int line, int column,
874 const std::string& description) {
875 if (line || column) {
876 return StringPrintf("Line: %i, column: %i, %s",
877 line, column, description.c_str());
878 }
879 return description;
880 }
881
882 } // namespace internal
883 } // namespace base
884