• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_parser.h"
6 
7 #include <cmath>
8 #include <string_view>
9 #include <utility>
10 #include <vector>
11 
12 #include "base/logging.h"
13 #include "base/numerics/safe_conversions.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversion_utils.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/third_party/icu/icu_utf.h"
20 #include "base/values.h"
21 
22 namespace base {
23 namespace internal {
24 
25 namespace {
26 
27 const int32_t kExtendedASCIIStart = 0x80;
28 
29 // Simple class that checks for maximum recursion/"stack overflow."
30 class StackMarker {
31  public:
StackMarker(int max_depth,int * depth)32   StackMarker(int max_depth, int* depth)
33       : max_depth_(max_depth), depth_(depth) {
34     ++(*depth_);
35     DCHECK_LE(*depth_, max_depth_);
36   }
~StackMarker()37   ~StackMarker() { --(*depth_); }
38 
IsTooDeep() const39   bool IsTooDeep() const { return *depth_ >= max_depth_; }
40 
41  private:
42   const int max_depth_;
43   int* const depth_;
44 
45   StackMarker(const StackMarker&) = delete;
46   StackMarker& operator=(const StackMarker&) = delete;
47 };
48 
49 constexpr uint32_t kUnicodeReplacementPoint = 0xFFFD;
50 
51 }  // namespace
52 
53 // This is U+FFFD.
54 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
55 
JSONParser(int options,int max_depth)56 JSONParser::JSONParser(int options, int max_depth)
57     : options_(options),
58       max_depth_(max_depth),
59       index_(0),
60       stack_depth_(0),
61       line_number_(0),
62       index_last_line_(0),
63       error_code_(JSONReader::JSON_NO_ERROR),
64       error_line_(0),
65       error_column_(0) {
66   CHECK_LE(max_depth, JSONReader::kStackMaxDepth);
67 }
68 
69 JSONParser::~JSONParser() = default;
70 
Parse(std::string_view input)71 std::optional<Value> JSONParser::Parse(std::string_view input) {
72   input_ = input;
73   index_ = 0;
74   line_number_ = 1;
75   index_last_line_ = 0;
76 
77   error_code_ = JSONReader::JSON_NO_ERROR;
78   error_line_ = 0;
79   error_column_ = 0;
80 
81   // ICU and ReadUnicodeCharacter() use int32_t for lengths, so ensure
82   // that the index_ will not overflow when parsing.
83   if (!base::IsValueInRangeForNumericType<int32_t>(input.length())) {
84     ReportError(JSONReader::JSON_TOO_LARGE, 0);
85     return std::nullopt;
86   }
87 
88   // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
89   // advance the start position to avoid the ParseNextToken function mis-
90   // treating a Unicode BOM as an invalid character and returning NULL.
91   ConsumeIfMatch("\xEF\xBB\xBF");
92 
93   // Parse the first and any nested tokens.
94   std::optional<Value> root(ParseNextToken());
95   if (!root)
96     return std::nullopt;
97 
98   // Make sure the input stream is at an end.
99   if (GetNextToken() != T_END_OF_INPUT) {
100     ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
101     return std::nullopt;
102   }
103 
104   return root;
105 }
106 
error_code() const107 JSONReader::JsonParseError JSONParser::error_code() const {
108   return error_code_;
109 }
110 
GetErrorMessage() const111 std::string JSONParser::GetErrorMessage() const {
112   return FormatErrorMessage(error_line_, error_column_,
113                             JSONReader::ErrorCodeToString(error_code_));
114 }
115 
error_line() const116 int JSONParser::error_line() const {
117   return error_line_;
118 }
119 
error_column() const120 int JSONParser::error_column() const {
121   return error_column_;
122 }
123 
124 // StringBuilder ///////////////////////////////////////////////////////////////
125 
StringBuilder()126 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
127 
StringBuilder(const char * pos)128 JSONParser::StringBuilder::StringBuilder(const char* pos)
129     : pos_(pos), length_(0) {}
130 
131 JSONParser::StringBuilder::~StringBuilder() = default;
132 
133 JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
134     StringBuilder&& other) = default;
135 
Append(uint32_t point)136 void JSONParser::StringBuilder::Append(uint32_t point) {
137   DCHECK(IsValidCharacter(point));
138 
139   if (point < kExtendedASCIIStart && !string_) {
140     DCHECK_EQ(static_cast<char>(point), pos_[length_]);
141     ++length_;
142   } else {
143     Convert();
144     if (UNLIKELY(point == kUnicodeReplacementPoint)) {
145       string_->append(kUnicodeReplacementString);
146     } else {
147       WriteUnicodeCharacter(point, &*string_);
148     }
149   }
150 }
151 
Convert()152 void JSONParser::StringBuilder::Convert() {
153   if (string_)
154     return;
155   string_.emplace(pos_, length_);
156 }
157 
DestructiveAsString()158 std::string JSONParser::StringBuilder::DestructiveAsString() {
159   if (string_)
160     return std::move(*string_);
161   return std::string(pos_, length_);
162 }
163 
164 // JSONParser private //////////////////////////////////////////////////////////
165 
PeekChars(int count)166 std::optional<std::string_view> JSONParser::PeekChars(int count) {
167   if (static_cast<size_t>(index_) + count > input_.length())
168     return std::nullopt;
169   // Using std::string_view::substr() is significantly slower (according to
170   // base_perftests) than constructing a substring manually.
171   return std::string_view(input_.data() + index_, count);
172 }
173 
PeekChar()174 std::optional<char> JSONParser::PeekChar() {
175   std::optional<std::string_view> chars = PeekChars(1);
176   if (chars)
177     return (*chars)[0];
178   return std::nullopt;
179 }
180 
ConsumeChars(int count)181 std::optional<std::string_view> JSONParser::ConsumeChars(int count) {
182   std::optional<std::string_view> chars = PeekChars(count);
183   if (chars)
184     index_ += count;
185   return chars;
186 }
187 
ConsumeChar()188 std::optional<char> JSONParser::ConsumeChar() {
189   std::optional<std::string_view> chars = ConsumeChars(1);
190   if (chars)
191     return (*chars)[0];
192   return std::nullopt;
193 }
194 
pos()195 const char* JSONParser::pos() {
196   CHECK_LE(static_cast<size_t>(index_), input_.length());
197   return input_.data() + index_;
198 }
199 
GetNextToken()200 JSONParser::Token JSONParser::GetNextToken() {
201   EatWhitespaceAndComments();
202 
203   std::optional<char> c = PeekChar();
204   if (!c)
205     return T_END_OF_INPUT;
206 
207   switch (*c) {
208     case '{':
209       return T_OBJECT_BEGIN;
210     case '}':
211       return T_OBJECT_END;
212     case '[':
213       return T_ARRAY_BEGIN;
214     case ']':
215       return T_ARRAY_END;
216     case '"':
217       return T_STRING;
218     case '0':
219     case '1':
220     case '2':
221     case '3':
222     case '4':
223     case '5':
224     case '6':
225     case '7':
226     case '8':
227     case '9':
228     case '-':
229       return T_NUMBER;
230     case 't':
231       return T_BOOL_TRUE;
232     case 'f':
233       return T_BOOL_FALSE;
234     case 'n':
235       return T_NULL;
236     case ',':
237       return T_LIST_SEPARATOR;
238     case ':':
239       return T_OBJECT_PAIR_SEPARATOR;
240     default:
241       return T_INVALID_TOKEN;
242   }
243 }
244 
EatWhitespaceAndComments()245 void JSONParser::EatWhitespaceAndComments() {
246   while (std::optional<char> c = PeekChar()) {
247     switch (*c) {
248       case '\r':
249       case '\n':
250         index_last_line_ = index_;
251         // Don't increment line_number_ twice for "\r\n".
252         if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
253           ++line_number_;
254         }
255         FALLTHROUGH;
256       case ' ':
257       case '\t':
258         ConsumeChar();
259         break;
260       case '/':
261         if (!EatComment())
262           return;
263         break;
264       default:
265         return;
266     }
267   }
268 }
269 
EatComment()270 bool JSONParser::EatComment() {
271   std::optional<std::string_view> comment_start = ConsumeChars(2);
272   if (!comment_start)
273     return false;
274 
275   if (comment_start == "//") {
276     // Single line comment, read to newline.
277     while (std::optional<char> c = PeekChar()) {
278       if (c == '\n' || c == '\r')
279         return true;
280       ConsumeChar();
281     }
282   } else if (comment_start == "/*") {
283     char previous_char = '\0';
284     // Block comment, read until end marker.
285     while (std::optional<char> c = PeekChar()) {
286       if (previous_char == '*' && c == '/') {
287         // EatWhitespaceAndComments will inspect pos(), which will still be on
288         // the last / of the comment, so advance once more (which may also be
289         // end of input).
290         ConsumeChar();
291         return true;
292       }
293       previous_char = *ConsumeChar();
294     }
295 
296     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
297   }
298 
299   return false;
300 }
301 
ParseNextToken()302 std::optional<Value> JSONParser::ParseNextToken() {
303   return ParseToken(GetNextToken());
304 }
305 
ParseToken(Token token)306 std::optional<Value> JSONParser::ParseToken(Token token) {
307   switch (token) {
308     case T_OBJECT_BEGIN:
309       return ConsumeDictionary();
310     case T_ARRAY_BEGIN:
311       return ConsumeList();
312     case T_STRING:
313       return ConsumeString();
314     case T_NUMBER:
315       return ConsumeNumber();
316     case T_BOOL_TRUE:
317     case T_BOOL_FALSE:
318     case T_NULL:
319       return ConsumeLiteral();
320     default:
321       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
322       return std::nullopt;
323   }
324 }
325 
ConsumeDictionary()326 std::optional<Value> JSONParser::ConsumeDictionary() {
327   if (ConsumeChar() != '{') {
328     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
329     return std::nullopt;
330   }
331 
332   StackMarker depth_check(max_depth_, &stack_depth_);
333   if (depth_check.IsTooDeep()) {
334     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
335     return std::nullopt;
336   }
337 
338   std::vector<Value::DictStorage::value_type> dict_storage;
339 
340   Token token = GetNextToken();
341   while (token != T_OBJECT_END) {
342     if (token != T_STRING) {
343       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
344       return std::nullopt;
345     }
346 
347     // First consume the key.
348     StringBuilder key;
349     if (!ConsumeStringRaw(&key)) {
350       return std::nullopt;
351     }
352 
353     // Read the separator.
354     token = GetNextToken();
355     if (token != T_OBJECT_PAIR_SEPARATOR) {
356       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
357       return std::nullopt;
358     }
359 
360     // The next token is the value. Ownership transfers to |dict|.
361     ConsumeChar();
362     std::optional<Value> value = ParseNextToken();
363     if (!value) {
364       // ReportError from deeper level.
365       return std::nullopt;
366     }
367 
368     dict_storage.emplace_back(key.DestructiveAsString(),
369                               std::make_unique<Value>(std::move(*value)));
370 
371     token = GetNextToken();
372     if (token == T_LIST_SEPARATOR) {
373       ConsumeChar();
374       token = GetNextToken();
375       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
376         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
377         return std::nullopt;
378       }
379     } else if (token != T_OBJECT_END) {
380       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
381       return std::nullopt;
382     }
383   }
384 
385   ConsumeChar();  // Closing '}'.
386 
387   return Value(Value::DictStorage(std::move(dict_storage), KEEP_LAST_OF_DUPES));
388 }
389 
ConsumeList()390 std::optional<Value> JSONParser::ConsumeList() {
391   if (ConsumeChar() != '[') {
392     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
393     return std::nullopt;
394   }
395 
396   StackMarker depth_check(max_depth_, &stack_depth_);
397   if (depth_check.IsTooDeep()) {
398     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
399     return std::nullopt;
400   }
401 
402   Value::ListStorage list_storage;
403 
404   Token token = GetNextToken();
405   while (token != T_ARRAY_END) {
406     std::optional<Value> item = ParseToken(token);
407     if (!item) {
408       // ReportError from deeper level.
409       return std::nullopt;
410     }
411 
412     list_storage.push_back(std::move(*item));
413 
414     token = GetNextToken();
415     if (token == T_LIST_SEPARATOR) {
416       ConsumeChar();
417       token = GetNextToken();
418       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
419         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
420         return std::nullopt;
421       }
422     } else if (token != T_ARRAY_END) {
423       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
424       return std::nullopt;
425     }
426   }
427 
428   ConsumeChar();  // Closing ']'.
429 
430   return Value(std::move(list_storage));
431 }
432 
ConsumeString()433 std::optional<Value> JSONParser::ConsumeString() {
434   StringBuilder string;
435   if (!ConsumeStringRaw(&string))
436     return std::nullopt;
437 
438   return Value(string.DestructiveAsString());
439 }
440 
ConsumeStringRaw(StringBuilder * out)441 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
442   if (ConsumeChar() != '"') {
443     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
444     return false;
445   }
446 
447   // StringBuilder will internally build a std::string_view unless a UTF-16
448   // conversion occurs, at which point it will perform a copy into a
449   // std::string.
450   StringBuilder string(pos());
451 
452   while (PeekChar()) {
453     uint32_t next_char = 0;
454     if (!ReadUnicodeCharacter(input_.data(),
455                               static_cast<int32_t>(input_.length()), &index_,
456                               &next_char) ||
457         !IsValidCharacter(next_char)) {
458       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
459         ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
460         return false;
461       }
462       ConsumeChar();
463       string.Append(kUnicodeReplacementPoint);
464       continue;
465     }
466 
467     if (next_char == '"') {
468       ConsumeChar();
469       *out = std::move(string);
470       return true;
471     } else if (next_char != '\\') {
472       // If this character is not an escape sequence...
473       ConsumeChar();
474       string.Append(next_char);
475     } else {
476       // And if it is an escape sequence, the input string will be adjusted
477       // (either by combining the two characters of an encoded escape sequence,
478       // or with a UTF conversion), so using std::string_view isn't possible --
479       // force a conversion.
480       string.Convert();
481 
482       // Read past the escape '\' and ensure there's a character following.
483       std::optional<std::string_view> escape_sequence = ConsumeChars(2);
484       if (!escape_sequence) {
485         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
486         return false;
487       }
488 
489       switch ((*escape_sequence)[1]) {
490         // Allowed esape sequences:
491         case 'x': {  // UTF-8 sequence.
492           // UTF-8 \x escape sequences are not allowed in the spec, but they
493           // are supported here for backwards-compatiblity with the old parser.
494           escape_sequence = ConsumeChars(2);
495           if (!escape_sequence) {
496             ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
497             return false;
498           }
499 
500           int hex_digit = 0;
501           if (!HexStringToInt(*escape_sequence, &hex_digit) ||
502               !IsValidCharacter(hex_digit)) {
503             ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
504             return false;
505           }
506 
507           string.Append(hex_digit);
508           break;
509         }
510         case 'u': {  // UTF-16 sequence.
511           // UTF units are of the form \uXXXX.
512           uint32_t code_point;
513           if (!DecodeUTF16(&code_point)) {
514             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
515             return false;
516           }
517           string.Append(code_point);
518           break;
519         }
520         case '"':
521           string.Append('"');
522           break;
523         case '\\':
524           string.Append('\\');
525           break;
526         case '/':
527           string.Append('/');
528           break;
529         case 'b':
530           string.Append('\b');
531           break;
532         case 'f':
533           string.Append('\f');
534           break;
535         case 'n':
536           string.Append('\n');
537           break;
538         case 'r':
539           string.Append('\r');
540           break;
541         case 't':
542           string.Append('\t');
543           break;
544         case 'v':  // Not listed as valid escape sequence in the RFC.
545           string.Append('\v');
546           break;
547         // All other escape squences are illegal.
548         default:
549           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
550           return false;
551       }
552     }
553   }
554 
555   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
556   return false;
557 }
558 
559 // Entry is at the first X in \uXXXX.
DecodeUTF16(uint32_t * out_code_point)560 bool JSONParser::DecodeUTF16(uint32_t* out_code_point) {
561   std::optional<std::string_view> escape_sequence = ConsumeChars(4);
562   if (!escape_sequence)
563     return false;
564 
565   // Consume the UTF-16 code unit, which may be a high surrogate.
566   int code_unit16_high = 0;
567   if (!HexStringToInt(*escape_sequence, &code_unit16_high))
568     return false;
569 
570   // If this is a high surrogate, consume the next code unit to get the
571   // low surrogate.
572   if (CBU16_IS_SURROGATE(code_unit16_high)) {
573     // Make sure this is the high surrogate. If not, it's an encoding
574     // error.
575     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
576       return false;
577 
578     // Make sure that the token has more characters to consume the
579     // lower surrogate.
580     if (!ConsumeIfMatch("\\u"))
581       return false;
582 
583     escape_sequence = ConsumeChars(4);
584     if (!escape_sequence)
585       return false;
586 
587     int code_unit16_low = 0;
588     if (!HexStringToInt(*escape_sequence, &code_unit16_low))
589       return false;
590 
591     if (!CBU16_IS_TRAIL(code_unit16_low))
592       return false;
593 
594     uint32_t code_point =
595         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
596     if (!IsValidCharacter(code_point))
597       return false;
598 
599     *out_code_point = code_point;
600   } else {
601     // Not a surrogate.
602     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
603     if (!IsValidCharacter(code_unit16_high)) {
604       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
605         return false;
606       }
607       *out_code_point = kUnicodeReplacementPoint;
608       return true;
609     }
610 
611     *out_code_point = code_unit16_high;
612   }
613 
614   return true;
615 }
616 
ConsumeNumber()617 std::optional<Value> JSONParser::ConsumeNumber() {
618   const char* num_start = pos();
619   const int start_index = index_;
620   int end_index = start_index;
621 
622   if (PeekChar() == '-')
623     ConsumeChar();
624 
625   if (!ReadInt(false)) {
626     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
627     return std::nullopt;
628   }
629   end_index = index_;
630 
631   // The optional fraction part.
632   if (PeekChar() == '.') {
633     ConsumeChar();
634     if (!ReadInt(true)) {
635       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
636       return std::nullopt;
637     }
638     end_index = index_;
639   }
640 
641   // Optional exponent part.
642   std::optional<char> c = PeekChar();
643   if (c == 'e' || c == 'E') {
644     ConsumeChar();
645     if (PeekChar() == '-' || PeekChar() == '+') {
646       ConsumeChar();
647     }
648     if (!ReadInt(true)) {
649       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
650       return std::nullopt;
651     }
652     end_index = index_;
653   }
654 
655   // ReadInt is greedy because numbers have no easily detectable sentinel,
656   // so save off where the parser should be on exit (see Consume invariant at
657   // the top of the header), then make sure the next token is one which is
658   // valid.
659   int exit_index = index_;
660 
661   switch (GetNextToken()) {
662     case T_OBJECT_END:
663     case T_ARRAY_END:
664     case T_LIST_SEPARATOR:
665     case T_END_OF_INPUT:
666       break;
667     default:
668       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
669       return std::nullopt;
670   }
671 
672   index_ = exit_index;
673 
674   std::string_view num_string(num_start, end_index - start_index);
675 
676   int num_int;
677   if (StringToInt(num_string, &num_int))
678     return Value(num_int);
679 
680   return std::nullopt;
681 }
682 
ReadInt(bool allow_leading_zeros)683 bool JSONParser::ReadInt(bool allow_leading_zeros) {
684   size_t len = 0;
685   char first = 0;
686 
687   while (std::optional<char> c = PeekChar()) {
688     if (!IsAsciiDigit(c))
689       break;
690 
691     if (len == 0)
692       first = *c;
693 
694     ++len;
695     ConsumeChar();
696   }
697 
698   if (len == 0)
699     return false;
700 
701   if (!allow_leading_zeros && len > 1 && first == '0')
702     return false;
703 
704   return true;
705 }
706 
ConsumeLiteral()707 std::optional<Value> JSONParser::ConsumeLiteral() {
708   if (ConsumeIfMatch("true")) {
709     return Value(true);
710   } else if (ConsumeIfMatch("false")) {
711     return Value(false);
712   } else if (ConsumeIfMatch("null")) {
713     return Value(Value::Type::NONE);
714   } else {
715     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
716     return std::nullopt;
717   }
718 }
719 
ConsumeIfMatch(std::string_view match)720 bool JSONParser::ConsumeIfMatch(std::string_view match) {
721   if (match == PeekChars(match.size())) {
722     ConsumeChars(match.size());
723     return true;
724   }
725   return false;
726 }
727 
ReportError(JSONReader::JsonParseError code,int column_adjust)728 void JSONParser::ReportError(JSONReader::JsonParseError code,
729                              int column_adjust) {
730   error_code_ = code;
731   error_line_ = line_number_;
732   error_column_ = index_ - index_last_line_ + column_adjust;
733 }
734 
735 // static
FormatErrorMessage(int line,int column,const std::string & description)736 std::string JSONParser::FormatErrorMessage(int line,
737                                            int column,
738                                            const std::string& description) {
739   if (line || column) {
740     return StringPrintf("Line: %i, column: %i, %s", line, column,
741                         description.c_str());
742   }
743   return description;
744 }
745 
746 }  // namespace internal
747 }  // namespace base
748