• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_reader.h"
6 
7 #include "base/float_util.h"
8 #include "base/logging.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_number_conversions.h"
11 #include "base/string_util.h"
12 #include "base/utf_string_conversions.h"
13 #include "base/values.h"
14 
15 namespace base {
16 
17 static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
18                                              0, 0);
19 static const int kStackLimit = 100;
20 
21 namespace {
22 
23 // A helper method for ParseNumberToken.  It reads an int from the end of
24 // token.  The method returns false if there is no valid integer at the end of
25 // the token.
ReadInt(JSONReader::Token & token,bool can_have_leading_zeros)26 bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {
27   wchar_t first = token.NextChar();
28   int len = 0;
29 
30   // Read in more digits
31   wchar_t c = first;
32   while ('\0' != c && '0' <= c && c <= '9') {
33     ++token.length;
34     ++len;
35     c = token.NextChar();
36   }
37   // We need at least 1 digit.
38   if (len == 0)
39     return false;
40 
41   if (!can_have_leading_zeros && len > 1 && '0' == first)
42     return false;
43 
44   return true;
45 }
46 
47 // A helper method for ParseStringToken.  It reads |digits| hex digits from the
48 // token. If the sequence if digits is not valid (contains other characters),
49 // the method returns false.
ReadHexDigits(JSONReader::Token & token,int digits)50 bool ReadHexDigits(JSONReader::Token& token, int digits) {
51   for (int i = 1; i <= digits; ++i) {
52     wchar_t c = *(token.begin + token.length + i);
53     if ('\0' == c)
54       return false;
55     if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
56           ('A' <= c && c <= 'F'))) {
57       return false;
58     }
59   }
60 
61   token.length += digits;
62   return true;
63 }
64 
65 }  // anonymous namespace
66 
67 const char* JSONReader::kBadRootElementType =
68     "Root value must be an array or object.";
69 const char* JSONReader::kInvalidEscape =
70     "Invalid escape sequence.";
71 const char* JSONReader::kSyntaxError =
72     "Syntax error.";
73 const char* JSONReader::kTrailingComma =
74     "Trailing comma not allowed.";
75 const char* JSONReader::kTooMuchNesting =
76     "Too much nesting.";
77 const char* JSONReader::kUnexpectedDataAfterRoot =
78     "Unexpected data after root element.";
79 const char* JSONReader::kUnsupportedEncoding =
80     "Unsupported encoding. JSON must be UTF-8.";
81 const char* JSONReader::kUnquotedDictionaryKey =
82     "Dictionary keys must be quoted.";
83 
JSONReader()84 JSONReader::JSONReader()
85     : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),
86       allow_trailing_comma_(false),
87       error_code_(JSON_NO_ERROR), error_line_(0), error_col_(0) {}
88 
89 /* static */
Read(const std::string & json,bool allow_trailing_comma)90 Value* JSONReader::Read(const std::string& json,
91                         bool allow_trailing_comma) {
92   return ReadAndReturnError(json, allow_trailing_comma, NULL, NULL);
93 }
94 
95 /* static */
ReadAndReturnError(const std::string & json,bool allow_trailing_comma,int * error_code_out,std::string * error_msg_out)96 Value* JSONReader::ReadAndReturnError(const std::string& json,
97                                       bool allow_trailing_comma,
98                                       int* error_code_out,
99                                       std::string* error_msg_out) {
100   JSONReader reader = JSONReader();
101   Value* root = reader.JsonToValue(json, true, allow_trailing_comma);
102   if (root)
103     return root;
104 
105   if (error_code_out)
106     *error_code_out = reader.error_code();
107   if (error_msg_out)
108     *error_msg_out = reader.GetErrorMessage();
109 
110   return NULL;
111 }
112 
113 /* static */
ErrorCodeToString(JsonParseError error_code)114 std::string JSONReader::ErrorCodeToString(JsonParseError error_code) {
115   switch (error_code) {
116     case JSON_NO_ERROR:
117       return std::string();
118     case JSON_BAD_ROOT_ELEMENT_TYPE:
119       return kBadRootElementType;
120     case JSON_INVALID_ESCAPE:
121       return kInvalidEscape;
122     case JSON_SYNTAX_ERROR:
123       return kSyntaxError;
124     case JSON_TRAILING_COMMA:
125       return kTrailingComma;
126     case JSON_TOO_MUCH_NESTING:
127       return kTooMuchNesting;
128     case JSON_UNEXPECTED_DATA_AFTER_ROOT:
129       return kUnexpectedDataAfterRoot;
130     case JSON_UNSUPPORTED_ENCODING:
131       return kUnsupportedEncoding;
132     case JSON_UNQUOTED_DICTIONARY_KEY:
133       return kUnquotedDictionaryKey;
134     default:
135       NOTREACHED();
136       return std::string();
137   }
138 }
139 
GetErrorMessage() const140 std::string JSONReader::GetErrorMessage() const {
141   return FormatErrorMessage(error_line_, error_col_,
142                             ErrorCodeToString(error_code_));
143 }
144 
JsonToValue(const std::string & json,bool check_root,bool allow_trailing_comma)145 Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
146                                bool allow_trailing_comma) {
147   // The input must be in UTF-8.
148   if (!IsStringUTF8(json.c_str())) {
149     error_code_ = JSON_UNSUPPORTED_ENCODING;
150     return NULL;
151   }
152 
153   // The conversion from UTF8 to wstring removes null bytes for us
154   // (a good thing).
155   std::wstring json_wide(UTF8ToWide(json));
156   start_pos_ = json_wide.c_str();
157 
158   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
159   // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
160   // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
161   // mis-treating a Unicode BOM as an invalid character and returning NULL,
162   // skip a converted Unicode BOM if it exists.
163   if (!json_wide.empty() && start_pos_[0] == 0xFEFF) {
164     ++start_pos_;
165   }
166 
167   json_pos_ = start_pos_;
168   allow_trailing_comma_ = allow_trailing_comma;
169   stack_depth_ = 0;
170   error_code_ = JSON_NO_ERROR;
171 
172   scoped_ptr<Value> root(BuildValue(check_root));
173   if (root.get()) {
174     if (ParseToken().type == Token::END_OF_INPUT) {
175       return root.release();
176     } else {
177       SetErrorCode(JSON_UNEXPECTED_DATA_AFTER_ROOT, json_pos_);
178     }
179   }
180 
181   // Default to calling errors "syntax errors".
182   if (error_code_ == 0)
183     SetErrorCode(JSON_SYNTAX_ERROR, json_pos_);
184 
185   return NULL;
186 }
187 
188 /* static */
FormatErrorMessage(int line,int column,const std::string & description)189 std::string JSONReader::FormatErrorMessage(int line, int column,
190                                            const std::string& description) {
191   if (line || column) {
192     return StringPrintf("Line: %i, column: %i, %s",
193                         line, column, description.c_str());
194   }
195   return description;
196 }
197 
BuildValue(bool is_root)198 Value* JSONReader::BuildValue(bool is_root) {
199   ++stack_depth_;
200   if (stack_depth_ > kStackLimit) {
201     SetErrorCode(JSON_TOO_MUCH_NESTING, json_pos_);
202     return NULL;
203   }
204 
205   Token token = ParseToken();
206   // The root token must be an array or an object.
207   if (is_root && token.type != Token::OBJECT_BEGIN &&
208       token.type != Token::ARRAY_BEGIN) {
209     SetErrorCode(JSON_BAD_ROOT_ELEMENT_TYPE, json_pos_);
210     return NULL;
211   }
212 
213   scoped_ptr<Value> node;
214 
215   switch (token.type) {
216     case Token::END_OF_INPUT:
217     case Token::INVALID_TOKEN:
218       return NULL;
219 
220     case Token::NULL_TOKEN:
221       node.reset(Value::CreateNullValue());
222       break;
223 
224     case Token::BOOL_TRUE:
225       node.reset(Value::CreateBooleanValue(true));
226       break;
227 
228     case Token::BOOL_FALSE:
229       node.reset(Value::CreateBooleanValue(false));
230       break;
231 
232     case Token::NUMBER:
233       node.reset(DecodeNumber(token));
234       if (!node.get())
235         return NULL;
236       break;
237 
238     case Token::STRING:
239       node.reset(DecodeString(token));
240       if (!node.get())
241         return NULL;
242       break;
243 
244     case Token::ARRAY_BEGIN:
245       {
246         json_pos_ += token.length;
247         token = ParseToken();
248 
249         node.reset(new ListValue());
250         while (token.type != Token::ARRAY_END) {
251           Value* array_node = BuildValue(false);
252           if (!array_node)
253             return NULL;
254           static_cast<ListValue*>(node.get())->Append(array_node);
255 
256           // After a list value, we expect a comma or the end of the list.
257           token = ParseToken();
258           if (token.type == Token::LIST_SEPARATOR) {
259             json_pos_ += token.length;
260             token = ParseToken();
261             // Trailing commas are invalid according to the JSON RFC, but some
262             // consumers need the parsing leniency, so handle accordingly.
263             if (token.type == Token::ARRAY_END) {
264               if (!allow_trailing_comma_) {
265                 SetErrorCode(JSON_TRAILING_COMMA, json_pos_);
266                 return NULL;
267               }
268               // Trailing comma OK, stop parsing the Array.
269               break;
270             }
271           } else if (token.type != Token::ARRAY_END) {
272             // Unexpected value after list value.  Bail out.
273             return NULL;
274           }
275         }
276         if (token.type != Token::ARRAY_END) {
277           return NULL;
278         }
279         break;
280       }
281 
282     case Token::OBJECT_BEGIN:
283       {
284         json_pos_ += token.length;
285         token = ParseToken();
286 
287         node.reset(new DictionaryValue);
288         while (token.type != Token::OBJECT_END) {
289           if (token.type != Token::STRING) {
290             SetErrorCode(JSON_UNQUOTED_DICTIONARY_KEY, json_pos_);
291             return NULL;
292           }
293           scoped_ptr<Value> dict_key_value(DecodeString(token));
294           if (!dict_key_value.get())
295             return NULL;
296 
297           // Convert the key into a wstring.
298           std::string dict_key;
299           bool success = dict_key_value->GetAsString(&dict_key);
300           DCHECK(success);
301 
302           json_pos_ += token.length;
303           token = ParseToken();
304           if (token.type != Token::OBJECT_PAIR_SEPARATOR)
305             return NULL;
306 
307           json_pos_ += token.length;
308           token = ParseToken();
309           Value* dict_value = BuildValue(false);
310           if (!dict_value)
311             return NULL;
312           static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion(
313               dict_key, dict_value);
314 
315           // After a key/value pair, we expect a comma or the end of the
316           // object.
317           token = ParseToken();
318           if (token.type == Token::LIST_SEPARATOR) {
319             json_pos_ += token.length;
320             token = ParseToken();
321             // Trailing commas are invalid according to the JSON RFC, but some
322             // consumers need the parsing leniency, so handle accordingly.
323             if (token.type == Token::OBJECT_END) {
324               if (!allow_trailing_comma_) {
325                 SetErrorCode(JSON_TRAILING_COMMA, json_pos_);
326                 return NULL;
327               }
328               // Trailing comma OK, stop parsing the Object.
329               break;
330             }
331           } else if (token.type != Token::OBJECT_END) {
332             // Unexpected value after last object value.  Bail out.
333             return NULL;
334           }
335         }
336         if (token.type != Token::OBJECT_END)
337           return NULL;
338 
339         break;
340       }
341 
342     default:
343       // We got a token that's not a value.
344       return NULL;
345   }
346   json_pos_ += token.length;
347 
348   --stack_depth_;
349   return node.release();
350 }
351 
ParseNumberToken()352 JSONReader::Token JSONReader::ParseNumberToken() {
353   // We just grab the number here.  We validate the size in DecodeNumber.
354   // According   to RFC4627, a valid number is: [minus] int [frac] [exp]
355   Token token(Token::NUMBER, json_pos_, 0);
356   wchar_t c = *json_pos_;
357   if ('-' == c) {
358     ++token.length;
359     c = token.NextChar();
360   }
361 
362   if (!ReadInt(token, false))
363     return kInvalidToken;
364 
365   // Optional fraction part
366   c = token.NextChar();
367   if ('.' == c) {
368     ++token.length;
369     if (!ReadInt(token, true))
370       return kInvalidToken;
371     c = token.NextChar();
372   }
373 
374   // Optional exponent part
375   if ('e' == c || 'E' == c) {
376     ++token.length;
377     c = token.NextChar();
378     if ('-' == c || '+' == c) {
379       ++token.length;
380       c = token.NextChar();
381     }
382     if (!ReadInt(token, true))
383       return kInvalidToken;
384   }
385 
386   return token;
387 }
388 
DecodeNumber(const Token & token)389 Value* JSONReader::DecodeNumber(const Token& token) {
390   const std::wstring num_string(token.begin, token.length);
391 
392   int num_int;
393   if (StringToInt(WideToUTF8(num_string), &num_int))
394     return Value::CreateIntegerValue(num_int);
395 
396   double num_double;
397   if (StringToDouble(WideToUTF8(num_string), &num_double) &&
398       base::IsFinite(num_double))
399     return Value::CreateDoubleValue(num_double);
400 
401   return NULL;
402 }
403 
ParseStringToken()404 JSONReader::Token JSONReader::ParseStringToken() {
405   Token token(Token::STRING, json_pos_, 1);
406   wchar_t c = token.NextChar();
407   while ('\0' != c) {
408     if ('\\' == c) {
409       ++token.length;
410       c = token.NextChar();
411       // Make sure the escaped char is valid.
412       switch (c) {
413         case 'x':
414           if (!ReadHexDigits(token, 2)) {
415             SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
416             return kInvalidToken;
417           }
418           break;
419         case 'u':
420           if (!ReadHexDigits(token, 4)) {
421             SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
422             return kInvalidToken;
423           }
424           break;
425         case '\\':
426         case '/':
427         case 'b':
428         case 'f':
429         case 'n':
430         case 'r':
431         case 't':
432         case 'v':
433         case '"':
434           break;
435         default:
436           SetErrorCode(JSON_INVALID_ESCAPE, json_pos_ + token.length);
437           return kInvalidToken;
438       }
439     } else if ('"' == c) {
440       ++token.length;
441       return token;
442     }
443     ++token.length;
444     c = token.NextChar();
445   }
446   return kInvalidToken;
447 }
448 
DecodeString(const Token & token)449 Value* JSONReader::DecodeString(const Token& token) {
450   std::wstring decoded_str;
451   decoded_str.reserve(token.length - 2);
452 
453   for (int i = 1; i < token.length - 1; ++i) {
454     wchar_t c = *(token.begin + i);
455     if ('\\' == c) {
456       ++i;
457       c = *(token.begin + i);
458       switch (c) {
459         case '"':
460         case '/':
461         case '\\':
462           decoded_str.push_back(c);
463           break;
464         case 'b':
465           decoded_str.push_back('\b');
466           break;
467         case 'f':
468           decoded_str.push_back('\f');
469           break;
470         case 'n':
471           decoded_str.push_back('\n');
472           break;
473         case 'r':
474           decoded_str.push_back('\r');
475           break;
476         case 't':
477           decoded_str.push_back('\t');
478           break;
479         case 'v':
480           decoded_str.push_back('\v');
481           break;
482 
483         case 'x':
484           decoded_str.push_back((HexDigitToInt(*(token.begin + i + 1)) << 4) +
485                                 HexDigitToInt(*(token.begin + i + 2)));
486           i += 2;
487           break;
488         case 'u':
489           decoded_str.push_back((HexDigitToInt(*(token.begin + i + 1)) << 12 ) +
490                                 (HexDigitToInt(*(token.begin + i + 2)) << 8) +
491                                 (HexDigitToInt(*(token.begin + i + 3)) << 4) +
492                                 HexDigitToInt(*(token.begin + i + 4)));
493           i += 4;
494           break;
495 
496         default:
497           // We should only have valid strings at this point.  If not,
498           // ParseStringToken didn't do it's job.
499           NOTREACHED();
500           return NULL;
501       }
502     } else {
503       // Not escaped
504       decoded_str.push_back(c);
505     }
506   }
507   return Value::CreateStringValue(WideToUTF16Hack(decoded_str));
508 }
509 
ParseToken()510 JSONReader::Token JSONReader::ParseToken() {
511   static const std::wstring kNullString(L"null");
512   static const std::wstring kTrueString(L"true");
513   static const std::wstring kFalseString(L"false");
514 
515   EatWhitespaceAndComments();
516 
517   Token token(Token::INVALID_TOKEN, 0, 0);
518   switch (*json_pos_) {
519     case '\0':
520       token.type = Token::END_OF_INPUT;
521       break;
522 
523     case 'n':
524       if (NextStringMatch(kNullString))
525         token = Token(Token::NULL_TOKEN, json_pos_, 4);
526       break;
527 
528     case 't':
529       if (NextStringMatch(kTrueString))
530         token = Token(Token::BOOL_TRUE, json_pos_, 4);
531       break;
532 
533     case 'f':
534       if (NextStringMatch(kFalseString))
535         token = Token(Token::BOOL_FALSE, json_pos_, 5);
536       break;
537 
538     case '[':
539       token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
540       break;
541 
542     case ']':
543       token = Token(Token::ARRAY_END, json_pos_, 1);
544       break;
545 
546     case ',':
547       token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
548       break;
549 
550     case '{':
551       token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
552       break;
553 
554     case '}':
555       token = Token(Token::OBJECT_END, json_pos_, 1);
556       break;
557 
558     case ':':
559       token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
560       break;
561 
562     case '0':
563     case '1':
564     case '2':
565     case '3':
566     case '4':
567     case '5':
568     case '6':
569     case '7':
570     case '8':
571     case '9':
572     case '-':
573       token = ParseNumberToken();
574       break;
575 
576     case '"':
577       token = ParseStringToken();
578       break;
579   }
580   return token;
581 }
582 
EatWhitespaceAndComments()583 void JSONReader::EatWhitespaceAndComments() {
584   while ('\0' != *json_pos_) {
585     switch (*json_pos_) {
586       case ' ':
587       case '\n':
588       case '\r':
589       case '\t':
590         ++json_pos_;
591         break;
592       case '/':
593         // TODO(tc): This isn't in the RFC so it should be a parser flag.
594         if (!EatComment())
595           return;
596         break;
597       default:
598         // Not a whitespace char, just exit.
599         return;
600     }
601   }
602 }
603 
EatComment()604 bool JSONReader::EatComment() {
605   if ('/' != *json_pos_)
606     return false;
607 
608   wchar_t next_char = *(json_pos_ + 1);
609   if ('/' == next_char) {
610     // Line comment, read until \n or \r
611     json_pos_ += 2;
612     while ('\0' != *json_pos_) {
613       switch (*json_pos_) {
614         case '\n':
615         case '\r':
616           ++json_pos_;
617           return true;
618         default:
619           ++json_pos_;
620       }
621     }
622   } else if ('*' == next_char) {
623     // Block comment, read until */
624     json_pos_ += 2;
625     while ('\0' != *json_pos_) {
626       if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
627         json_pos_ += 2;
628         return true;
629       }
630       ++json_pos_;
631     }
632   } else {
633     return false;
634   }
635   return true;
636 }
637 
NextStringMatch(const std::wstring & str)638 bool JSONReader::NextStringMatch(const std::wstring& str) {
639   for (size_t i = 0; i < str.length(); ++i) {
640     if ('\0' == *json_pos_)
641       return false;
642     if (*(json_pos_ + i) != str[i])
643       return false;
644   }
645   return true;
646 }
647 
SetErrorCode(JsonParseError error,const wchar_t * error_pos)648 void JSONReader::SetErrorCode(JsonParseError error,
649                               const wchar_t* error_pos) {
650   int line_number = 1;
651   int column_number = 1;
652 
653   // Figure out the line and column the error occured at.
654   for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) {
655     if (*pos == '\0') {
656       NOTREACHED();
657       return;
658     }
659 
660     if (*pos == '\n') {
661       ++line_number;
662       column_number = 1;
663     } else {
664       ++column_number;
665     }
666   }
667 
668   error_line_ = line_number;
669   error_col_ = column_number;
670   error_code_ = error;
671 }
672 
673 }  // namespace base
674