1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include <cmath>
8 #include <iterator>
9 #include <utility>
10 #include <vector>
11
12 #include "base/check_op.h"
13 #include "base/feature_list.h"
14 #include "base/features.h"
15 #include "base/json/json_reader.h"
16 #include "base/metrics/histogram_functions.h"
17 #include "base/notreached.h"
18 #include "base/numerics/safe_conversions.h"
19 #include "base/ranges/algorithm.h"
20 #include "base/strings/string_number_conversions.h"
21 #include "base/strings/string_piece.h"
22 #include "base/strings/string_util.h"
23 #include "base/strings/stringprintf.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27 #include "third_party/abseil-cpp/absl/types/optional.h"
28
29 namespace base {
30 namespace internal {
31
32 namespace {
33
34 // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
35 static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
36 "JSONParser error out of bounds");
37
ErrorCodeToString(JSONParser::JsonParseError error_code)38 std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
39 switch (error_code) {
40 case JSONParser::JSON_NO_ERROR:
41 return std::string();
42 case JSONParser::JSON_SYNTAX_ERROR:
43 return JSONParser::kSyntaxError;
44 case JSONParser::JSON_INVALID_ESCAPE:
45 return JSONParser::kInvalidEscape;
46 case JSONParser::JSON_UNEXPECTED_TOKEN:
47 return JSONParser::kUnexpectedToken;
48 case JSONParser::JSON_TRAILING_COMMA:
49 return JSONParser::kTrailingComma;
50 case JSONParser::JSON_TOO_MUCH_NESTING:
51 return JSONParser::kTooMuchNesting;
52 case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
53 return JSONParser::kUnexpectedDataAfterRoot;
54 case JSONParser::JSON_UNSUPPORTED_ENCODING:
55 return JSONParser::kUnsupportedEncoding;
56 case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
57 return JSONParser::kUnquotedDictionaryKey;
58 case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
59 return JSONParser::kUnrepresentableNumber;
60 case JSONParser::JSON_PARSE_ERROR_COUNT:
61 break;
62 }
63 NOTREACHED();
64 return std::string();
65 }
66
67 const int32_t kExtendedASCIIStart = 0x80;
68 constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
69
70 // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
71 // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
72 // permitted.
UnprefixedHexStringToInt(StringPiece input,int * output)73 bool UnprefixedHexStringToInt(StringPiece input, int* output) {
74 for (size_t i = 0; i < input.size(); i++) {
75 if (!IsHexDigit(input[i])) {
76 return false;
77 }
78 }
79 return HexStringToInt(input, output);
80 }
81
82 // These values are persisted to logs. Entries should not be renumbered and
83 // numeric values should never be reused.
84 enum class ChromiumJsonExtension {
85 kCComment,
86 kCppComment,
87 kXEscape,
88 kVerticalTabEscape,
89 kControlCharacter,
90 kMaxValue = kControlCharacter,
91 };
92
93 const char kExtensionHistogramName[] =
94 "Security.JSONParser.ChromiumExtensionUsage";
95
96 } // namespace
97
98 // This is U+FFFD.
99 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
100
101 const char JSONParser::kSyntaxError[] = "Syntax error.";
102 const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
103 const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
104 const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
105 const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
106 const char JSONParser::kUnexpectedDataAfterRoot[] =
107 "Unexpected data after root element.";
108 const char JSONParser::kUnsupportedEncoding[] =
109 "Unsupported encoding. JSON must be UTF-8.";
110 const char JSONParser::kUnquotedDictionaryKey[] =
111 "Dictionary keys must be quoted.";
112 const char JSONParser::kUnrepresentableNumber[] =
113 "Number cannot be represented.";
114
JSONParser(int options,size_t max_depth)115 JSONParser::JSONParser(int options, size_t max_depth)
116 : options_(options),
117 max_depth_(max_depth),
118 index_(0),
119 stack_depth_(0),
120 line_number_(0),
121 index_last_line_(0),
122 error_code_(JSON_NO_ERROR),
123 error_line_(0),
124 error_column_(0) {
125 CHECK_LE(max_depth, kAbsoluteMaxDepth);
126 }
127
128 JSONParser::~JSONParser() = default;
129
Parse(StringPiece input)130 absl::optional<Value> JSONParser::Parse(StringPiece input) {
131 input_ = input;
132 index_ = 0;
133 // Line and column counting is 1-based, but |index_| is 0-based. For example,
134 // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
135 // 2) and have indexes of 0 and 4. We track the line number explicitly (the
136 // |line_number_| field) and the column number implicitly (the difference
137 // between |index_| and |index_last_line_|). In calculating that difference,
138 // |index_last_line_| is the index of the '\r' or '\n', not the index of the
139 // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
140 // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
141 // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
142 // index of the imaginary '\n' immediately before the start of the string:
143 // 'A' is in column (0 - -1) = 1.
144 line_number_ = 1;
145 index_last_line_ = static_cast<size_t>(-1);
146
147 error_code_ = JSON_NO_ERROR;
148 error_line_ = 0;
149 error_column_ = 0;
150
151 // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
152 // advance the start position to avoid the ParseNextToken function mis-
153 // treating a Unicode BOM as an invalid character and returning NULL.
154 ConsumeIfMatch("\xEF\xBB\xBF");
155
156 // Parse the first and any nested tokens.
157 absl::optional<Value> root(ParseNextToken());
158 if (!root)
159 return absl::nullopt;
160
161 // Make sure the input stream is at an end.
162 if (GetNextToken() != T_END_OF_INPUT) {
163 ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
164 return absl::nullopt;
165 }
166
167 return root;
168 }
169
error_code() const170 JSONParser::JsonParseError JSONParser::error_code() const {
171 return error_code_;
172 }
173
GetErrorMessage() const174 std::string JSONParser::GetErrorMessage() const {
175 return FormatErrorMessage(error_line_, error_column_,
176 ErrorCodeToString(error_code_));
177 }
178
error_line() const179 int JSONParser::error_line() const {
180 return error_line_;
181 }
182
error_column() const183 int JSONParser::error_column() const {
184 return error_column_;
185 }
186
187 // StringBuilder ///////////////////////////////////////////////////////////////
188
StringBuilder()189 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
190
StringBuilder(const char * pos)191 JSONParser::StringBuilder::StringBuilder(const char* pos)
192 : pos_(pos), length_(0) {}
193
194 JSONParser::StringBuilder::~StringBuilder() = default;
195
196 JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
197 StringBuilder&& other) = default;
198
Append(base_icu::UChar32 point)199 void JSONParser::StringBuilder::Append(base_icu::UChar32 point) {
200 DCHECK(IsValidCodepoint(point));
201
202 if (point < kExtendedASCIIStart) {
203 if (!string_) {
204 DCHECK_EQ(static_cast<char>(point), pos_[length_]);
205 ++length_;
206 } else {
207 string_->push_back(static_cast<char>(point));
208 }
209 } else {
210 Convert();
211 if (UNLIKELY(point == kUnicodeReplacementPoint)) {
212 string_->append(kUnicodeReplacementString);
213 } else {
214 WriteUnicodeCharacter(point, &*string_);
215 }
216 }
217 }
218
Convert()219 void JSONParser::StringBuilder::Convert() {
220 if (string_)
221 return;
222 string_.emplace(pos_, length_);
223 }
224
DestructiveAsString()225 std::string JSONParser::StringBuilder::DestructiveAsString() {
226 if (string_)
227 return std::move(*string_);
228 return std::string(pos_, length_);
229 }
230
231 // JSONParser private //////////////////////////////////////////////////////////
232
PeekChars(size_t count)233 absl::optional<StringPiece> JSONParser::PeekChars(size_t count) {
234 if (index_ + count > input_.length())
235 return absl::nullopt;
236 // Using StringPiece::substr() is significantly slower (according to
237 // base_perftests) than constructing a substring manually.
238 return StringPiece(input_.data() + index_, count);
239 }
240
PeekChar()241 absl::optional<char> JSONParser::PeekChar() {
242 absl::optional<StringPiece> chars = PeekChars(1);
243 if (chars)
244 return (*chars)[0];
245 return absl::nullopt;
246 }
247
ConsumeChars(size_t count)248 absl::optional<StringPiece> JSONParser::ConsumeChars(size_t count) {
249 absl::optional<StringPiece> chars = PeekChars(count);
250 if (chars)
251 index_ += count;
252 return chars;
253 }
254
ConsumeChar()255 absl::optional<char> JSONParser::ConsumeChar() {
256 absl::optional<StringPiece> chars = ConsumeChars(1);
257 if (chars)
258 return (*chars)[0];
259 return absl::nullopt;
260 }
261
pos()262 const char* JSONParser::pos() {
263 CHECK_LE(index_, input_.length());
264 return input_.data() + index_;
265 }
266
GetNextToken()267 JSONParser::Token JSONParser::GetNextToken() {
268 EatWhitespaceAndComments();
269
270 absl::optional<char> c = PeekChar();
271 if (!c)
272 return T_END_OF_INPUT;
273
274 switch (*c) {
275 case '{':
276 return T_OBJECT_BEGIN;
277 case '}':
278 return T_OBJECT_END;
279 case '[':
280 return T_ARRAY_BEGIN;
281 case ']':
282 return T_ARRAY_END;
283 case '"':
284 return T_STRING;
285 case '0':
286 case '1':
287 case '2':
288 case '3':
289 case '4':
290 case '5':
291 case '6':
292 case '7':
293 case '8':
294 case '9':
295 case '-':
296 return T_NUMBER;
297 case 't':
298 return T_BOOL_TRUE;
299 case 'f':
300 return T_BOOL_FALSE;
301 case 'n':
302 return T_NULL;
303 case ',':
304 return T_LIST_SEPARATOR;
305 case ':':
306 return T_OBJECT_PAIR_SEPARATOR;
307 default:
308 return T_INVALID_TOKEN;
309 }
310 }
311
EatWhitespaceAndComments()312 void JSONParser::EatWhitespaceAndComments() {
313 while (absl::optional<char> c = PeekChar()) {
314 switch (*c) {
315 case '\r':
316 case '\n':
317 index_last_line_ = index_;
318 // Don't increment line_number_ twice for "\r\n".
319 if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
320 ++line_number_;
321 }
322 [[fallthrough]];
323 case ' ':
324 case '\t':
325 ConsumeChar();
326 break;
327 case '/':
328 if (!EatComment())
329 return;
330 break;
331 default:
332 return;
333 }
334 }
335 }
336
EatComment()337 bool JSONParser::EatComment() {
338 absl::optional<StringPiece> comment_start = PeekChars(2);
339 if (!comment_start)
340 return false;
341
342 const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
343
344 if (comment_start == "//") {
345 UmaHistogramEnumeration(kExtensionHistogramName,
346 ChromiumJsonExtension::kCppComment);
347 if (!comments_allowed) {
348 ReportError(JSON_UNEXPECTED_TOKEN, 0);
349 return false;
350 }
351
352 ConsumeChars(2);
353 // Single line comment, read to newline.
354 while (absl::optional<char> c = PeekChar()) {
355 if (c == '\n' || c == '\r')
356 return true;
357 ConsumeChar();
358 }
359 } else if (comment_start == "/*") {
360 UmaHistogramEnumeration(kExtensionHistogramName,
361 ChromiumJsonExtension::kCComment);
362 if (!comments_allowed) {
363 ReportError(JSON_UNEXPECTED_TOKEN, 0);
364 return false;
365 }
366
367 ConsumeChars(2);
368 char previous_char = '\0';
369 // Block comment, read until end marker.
370 while (absl::optional<char> c = PeekChar()) {
371 if (previous_char == '*' && c == '/') {
372 // EatWhitespaceAndComments will inspect pos(), which will still be on
373 // the last / of the comment, so advance once more (which may also be
374 // end of input).
375 ConsumeChar();
376 return true;
377 }
378 previous_char = *ConsumeChar();
379 }
380
381 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
382 }
383
384 return false;
385 }
386
ParseNextToken()387 absl::optional<Value> JSONParser::ParseNextToken() {
388 return ParseToken(GetNextToken());
389 }
390
ParseToken(Token token)391 absl::optional<Value> JSONParser::ParseToken(Token token) {
392 switch (token) {
393 case T_OBJECT_BEGIN:
394 return ConsumeDictionary();
395 case T_ARRAY_BEGIN:
396 return ConsumeList();
397 case T_STRING:
398 return ConsumeString();
399 case T_NUMBER:
400 return ConsumeNumber();
401 case T_BOOL_TRUE:
402 case T_BOOL_FALSE:
403 case T_NULL:
404 return ConsumeLiteral();
405 default:
406 ReportError(JSON_UNEXPECTED_TOKEN, 0);
407 return absl::nullopt;
408 }
409 }
410
ConsumeDictionary()411 absl::optional<Value> JSONParser::ConsumeDictionary() {
412 if (ConsumeChar() != '{') {
413 ReportError(JSON_UNEXPECTED_TOKEN, 0);
414 return absl::nullopt;
415 }
416
417 StackMarker depth_check(max_depth_, &stack_depth_);
418 if (depth_check.IsTooDeep()) {
419 ReportError(JSON_TOO_MUCH_NESTING, -1);
420 return absl::nullopt;
421 }
422
423 std::vector<std::pair<std::string, Value>> values;
424
425 Token token = GetNextToken();
426 while (token != T_OBJECT_END) {
427 if (token != T_STRING) {
428 ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
429 return absl::nullopt;
430 }
431
432 // First consume the key.
433 StringBuilder key;
434 if (!ConsumeStringRaw(&key)) {
435 return absl::nullopt;
436 }
437
438 // Read the separator.
439 token = GetNextToken();
440 if (token != T_OBJECT_PAIR_SEPARATOR) {
441 ReportError(JSON_SYNTAX_ERROR, 0);
442 return absl::nullopt;
443 }
444
445 // The next token is the value. Ownership transfers to |dict|.
446 ConsumeChar();
447 absl::optional<Value> value = ParseNextToken();
448 if (!value) {
449 // ReportError from deeper level.
450 return absl::nullopt;
451 }
452
453 values.emplace_back(key.DestructiveAsString(), std::move(*value));
454
455 token = GetNextToken();
456 if (token == T_LIST_SEPARATOR) {
457 ConsumeChar();
458 token = GetNextToken();
459 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
460 ReportError(JSON_TRAILING_COMMA, 0);
461 return absl::nullopt;
462 }
463 } else if (token != T_OBJECT_END) {
464 ReportError(JSON_SYNTAX_ERROR, 0);
465 return absl::nullopt;
466 }
467 }
468
469 ConsumeChar(); // Closing '}'.
470 // Reverse |dict_storage| to keep the last of elements with the same key in
471 // the input.
472 ranges::reverse(values);
473 return Value(Value::Dict(std::make_move_iterator(values.begin()),
474 std::make_move_iterator(values.end())));
475 }
476
ConsumeList()477 absl::optional<Value> JSONParser::ConsumeList() {
478 if (ConsumeChar() != '[') {
479 ReportError(JSON_UNEXPECTED_TOKEN, 0);
480 return absl::nullopt;
481 }
482
483 StackMarker depth_check(max_depth_, &stack_depth_);
484 if (depth_check.IsTooDeep()) {
485 ReportError(JSON_TOO_MUCH_NESTING, -1);
486 return absl::nullopt;
487 }
488
489 Value::List list;
490
491 Token token = GetNextToken();
492 while (token != T_ARRAY_END) {
493 absl::optional<Value> item = ParseToken(token);
494 if (!item) {
495 // ReportError from deeper level.
496 return absl::nullopt;
497 }
498
499 list.Append(std::move(*item));
500
501 token = GetNextToken();
502 if (token == T_LIST_SEPARATOR) {
503 ConsumeChar();
504 token = GetNextToken();
505 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
506 ReportError(JSON_TRAILING_COMMA, 0);
507 return absl::nullopt;
508 }
509 } else if (token != T_ARRAY_END) {
510 ReportError(JSON_SYNTAX_ERROR, 0);
511 return absl::nullopt;
512 }
513 }
514
515 ConsumeChar(); // Closing ']'.
516
517 return Value(std::move(list));
518 }
519
ConsumeString()520 absl::optional<Value> JSONParser::ConsumeString() {
521 StringBuilder string;
522 if (!ConsumeStringRaw(&string))
523 return absl::nullopt;
524 return Value(string.DestructiveAsString());
525 }
526
ConsumeStringRaw(StringBuilder * out)527 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
528 if (ConsumeChar() != '"') {
529 ReportError(JSON_UNEXPECTED_TOKEN, 0);
530 return false;
531 }
532
533 // StringBuilder will internally build a StringPiece unless a UTF-16
534 // conversion occurs, at which point it will perform a copy into a
535 // std::string.
536 StringBuilder string(pos());
537
538 while (absl::optional<char> c = PeekChar()) {
539 base_icu::UChar32 next_char = 0;
540 if (static_cast<unsigned char>(*c) < kExtendedASCIIStart) {
541 // Fast path for ASCII.
542 next_char = *c;
543 } else if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
544 &next_char) ||
545 !IsValidCodepoint(next_char)) {
546 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
547 ReportError(JSON_UNSUPPORTED_ENCODING, 0);
548 return false;
549 }
550 ConsumeChar();
551 string.Append(kUnicodeReplacementPoint);
552 continue;
553 }
554
555 if (next_char == '"') {
556 ConsumeChar();
557 *out = std::move(string);
558 return true;
559 }
560 if (next_char != '\\') {
561 // Per Section 7, "All Unicode characters may be placed within the
562 // quotation marks, except for the characters that MUST be escaped:
563 // quotation mark, reverse solidus, and the control characters (U+0000
564 // through U+001F)".
565 if (next_char <= 0x1F) {
566 UmaHistogramEnumeration(kExtensionHistogramName,
567 ChromiumJsonExtension::kControlCharacter);
568 if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
569 ReportError(JSON_UNSUPPORTED_ENCODING, -1);
570 return false;
571 }
572 }
573
574 // If this character is not an escape sequence, track any line breaks and
575 // copy next_char to the StringBuilder. The JSON spec forbids unescaped
576 // ASCII control characters within a string, including '\r' and '\n', but
577 // this implementation is more lenient.
578 if ((next_char == '\r') || (next_char == '\n')) {
579 index_last_line_ = index_;
580 // Don't increment line_number_ twice for "\r\n". We are guaranteed
581 // that (index_ > 0) because we are consuming a string, so we must have
582 // seen an opening '"' quote character.
583 if ((next_char == '\r') || (input_[index_ - 1] != '\r')) {
584 ++line_number_;
585 }
586 }
587 ConsumeChar();
588 string.Append(next_char);
589 } else {
590 // And if it is an escape sequence, the input string will be adjusted
591 // (either by combining the two characters of an encoded escape sequence,
592 // or with a UTF conversion), so using StringPiece isn't possible -- force
593 // a conversion.
594 string.Convert();
595
596 // Read past the escape '\' and ensure there's a character following.
597 absl::optional<StringPiece> escape_sequence = ConsumeChars(2);
598 if (!escape_sequence) {
599 ReportError(JSON_INVALID_ESCAPE, -1);
600 return false;
601 }
602
603 switch ((*escape_sequence)[1]) {
604 // Allowed esape sequences:
605 case 'x': { // UTF-8 sequence.
606 // UTF-8 \x escape sequences are not allowed in the spec, but they
607 // are supported here for backwards-compatiblity with the old parser.
608 UmaHistogramEnumeration(kExtensionHistogramName,
609 ChromiumJsonExtension::kXEscape);
610 if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
611 ReportError(JSON_INVALID_ESCAPE, -1);
612 return false;
613 }
614
615 escape_sequence = ConsumeChars(2);
616 if (!escape_sequence) {
617 ReportError(JSON_INVALID_ESCAPE, -3);
618 return false;
619 }
620
621 int hex_digit = 0;
622 if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit) ||
623 !IsValidCharacter(hex_digit)) {
624 ReportError(JSON_INVALID_ESCAPE, -3);
625 return false;
626 }
627
628 string.Append(hex_digit);
629 break;
630 }
631 case 'u': { // UTF-16 sequence.
632 // UTF units are of the form \uXXXX.
633 base_icu::UChar32 code_point;
634 if (!DecodeUTF16(&code_point)) {
635 ReportError(JSON_INVALID_ESCAPE, -1);
636 return false;
637 }
638 string.Append(code_point);
639 break;
640 }
641 case '"':
642 string.Append('"');
643 break;
644 case '\\':
645 string.Append('\\');
646 break;
647 case '/':
648 string.Append('/');
649 break;
650 case 'b':
651 string.Append('\b');
652 break;
653 case 'f':
654 string.Append('\f');
655 break;
656 case 'n':
657 string.Append('\n');
658 break;
659 case 'r':
660 string.Append('\r');
661 break;
662 case 't':
663 string.Append('\t');
664 break;
665 case 'v': // Not listed as valid escape sequence in the RFC.
666 UmaHistogramEnumeration(kExtensionHistogramName,
667 ChromiumJsonExtension::kVerticalTabEscape);
668 if (!(options_ & JSON_ALLOW_VERT_TAB)) {
669 ReportError(JSON_INVALID_ESCAPE, -1);
670 return false;
671 }
672 string.Append('\v');
673 break;
674 // All other escape squences are illegal.
675 default:
676 ReportError(JSON_INVALID_ESCAPE, -1);
677 return false;
678 }
679 }
680 }
681
682 ReportError(JSON_SYNTAX_ERROR, -1);
683 return false;
684 }
685
686 // Entry is at the first X in \uXXXX.
DecodeUTF16(base_icu::UChar32 * out_code_point)687 bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
688 absl::optional<StringPiece> escape_sequence = ConsumeChars(4);
689 if (!escape_sequence)
690 return false;
691
692 // Consume the UTF-16 code unit, which may be a high surrogate.
693 int code_unit16_high = 0;
694 if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
695 return false;
696
697 // If this is a high surrogate, consume the next code unit to get the
698 // low surrogate.
699 if (CBU16_IS_SURROGATE(code_unit16_high)) {
700 // Make sure this is the high surrogate.
701 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
702 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
703 return false;
704 *out_code_point = kUnicodeReplacementPoint;
705 return true;
706 }
707
708 // Make sure that the token has more characters to consume the
709 // lower surrogate.
710 if (!ConsumeIfMatch("\\u")) {
711 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
712 return false;
713 *out_code_point = kUnicodeReplacementPoint;
714 return true;
715 }
716
717 escape_sequence = ConsumeChars(4);
718 if (!escape_sequence)
719 return false;
720
721 int code_unit16_low = 0;
722 if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
723 return false;
724
725 if (!CBU16_IS_TRAIL(code_unit16_low)) {
726 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
727 return false;
728 *out_code_point = kUnicodeReplacementPoint;
729 return true;
730 }
731
732 base_icu::UChar32 code_point =
733 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
734
735 *out_code_point = code_point;
736 } else {
737 // Not a surrogate.
738 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
739
740 *out_code_point = code_unit16_high;
741 }
742
743 return true;
744 }
745
ConsumeNumber()746 absl::optional<Value> JSONParser::ConsumeNumber() {
747 const char* num_start = pos();
748 const size_t start_index = index_;
749 size_t end_index = start_index;
750
751 if (PeekChar() == '-')
752 ConsumeChar();
753
754 if (!ReadInt(false)) {
755 ReportError(JSON_SYNTAX_ERROR, 0);
756 return absl::nullopt;
757 }
758 end_index = index_;
759
760 // The optional fraction part.
761 if (PeekChar() == '.') {
762 ConsumeChar();
763 if (!ReadInt(true)) {
764 ReportError(JSON_SYNTAX_ERROR, 0);
765 return absl::nullopt;
766 }
767 end_index = index_;
768 }
769
770 // Optional exponent part.
771 absl::optional<char> c = PeekChar();
772 if (c == 'e' || c == 'E') {
773 ConsumeChar();
774 if (PeekChar() == '-' || PeekChar() == '+') {
775 ConsumeChar();
776 }
777 if (!ReadInt(true)) {
778 ReportError(JSON_SYNTAX_ERROR, 0);
779 return absl::nullopt;
780 }
781 end_index = index_;
782 }
783
784 // ReadInt is greedy because numbers have no easily detectable sentinel,
785 // so save off where the parser should be on exit (see Consume invariant at
786 // the top of the header), then make sure the next token is one which is
787 // valid.
788 size_t exit_index = index_;
789
790 switch (GetNextToken()) {
791 case T_OBJECT_END:
792 case T_ARRAY_END:
793 case T_LIST_SEPARATOR:
794 case T_END_OF_INPUT:
795 break;
796 default:
797 ReportError(JSON_SYNTAX_ERROR, 0);
798 return absl::nullopt;
799 }
800
801 index_ = exit_index;
802
803 StringPiece num_string(num_start, end_index - start_index);
804
805 int num_int;
806 if (StringToInt(num_string, &num_int)) {
807 // StringToInt will treat `-0` as zero, losing the significance of the
808 // negation.
809 if (num_int == 0 && num_string.starts_with('-')) {
810 if (base::FeatureList::IsEnabled(features::kJsonNegativeZero)) {
811 return Value(-0.0);
812 }
813 }
814 return Value(num_int);
815 }
816
817 double num_double;
818 if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
819 return Value(num_double);
820 }
821
822 ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
823 return absl::nullopt;
824 }
825
ReadInt(bool allow_leading_zeros)826 bool JSONParser::ReadInt(bool allow_leading_zeros) {
827 size_t len = 0;
828 char first = 0;
829
830 while (absl::optional<char> c = PeekChar()) {
831 if (!IsAsciiDigit(c))
832 break;
833
834 if (len == 0)
835 first = *c;
836
837 ++len;
838 ConsumeChar();
839 }
840
841 if (len == 0)
842 return false;
843
844 if (!allow_leading_zeros && len > 1 && first == '0')
845 return false;
846
847 return true;
848 }
849
ConsumeLiteral()850 absl::optional<Value> JSONParser::ConsumeLiteral() {
851 if (ConsumeIfMatch("true"))
852 return Value(true);
853 if (ConsumeIfMatch("false"))
854 return Value(false);
855 if (ConsumeIfMatch("null"))
856 return Value(Value::Type::NONE);
857 ReportError(JSON_SYNTAX_ERROR, 0);
858 return absl::nullopt;
859 }
860
ConsumeIfMatch(StringPiece match)861 bool JSONParser::ConsumeIfMatch(StringPiece match) {
862 if (match == PeekChars(match.size())) {
863 ConsumeChars(match.size());
864 return true;
865 }
866 return false;
867 }
868
ReportError(JsonParseError code,int column_adjust)869 void JSONParser::ReportError(JsonParseError code, int column_adjust) {
870 error_code_ = code;
871 error_line_ = line_number_;
872 error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
873
874 // For a final blank line ('\n' and then EOF), a negative column_adjust may
875 // put us below 1, which doesn't really make sense for 1-based columns.
876 if (error_column_ < 1) {
877 error_column_ = 1;
878 }
879 }
880
881 // static
FormatErrorMessage(int line,int column,const std::string & description)882 std::string JSONParser::FormatErrorMessage(int line, int column,
883 const std::string& description) {
884 if (line || column) {
885 return StringPrintf("Line: %i, column: %i, %s",
886 line, column, description.c_str());
887 }
888 return description;
889 }
890
891 } // namespace internal
892 } // namespace base
893