• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_reader.h"
6 
7 #include "base/float_util.h"
8 #include "base/logging.h"
9 #include "base/scoped_ptr.h"
10 #include "base/string_util.h"
11 #include "base/utf_string_conversions.h"
12 #include "base/values.h"
13 
14 namespace base {
15 
16 static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
17                                              0, 0);
18 static const int kStackLimit = 100;
19 
20 namespace {
21 
HexToInt(wchar_t c)22 inline int HexToInt(wchar_t c) {
23   if ('0' <= c && c <= '9') {
24     return c - '0';
25   } else if ('A' <= c && c <= 'F') {
26     return c - 'A' + 10;
27   } else if ('a' <= c && c <= 'f') {
28     return c - 'a' + 10;
29   }
30   NOTREACHED();
31   return 0;
32 }
33 
34 // A helper method for ParseNumberToken.  It reads an int from the end of
35 // token.  The method returns false if there is no valid integer at the end of
36 // the token.
ReadInt(JSONReader::Token & token,bool can_have_leading_zeros)37 bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {
38   wchar_t first = token.NextChar();
39   int len = 0;
40 
41   // Read in more digits
42   wchar_t c = first;
43   while ('\0' != c && '0' <= c && c <= '9') {
44     ++token.length;
45     ++len;
46     c = token.NextChar();
47   }
48   // We need at least 1 digit.
49   if (len == 0)
50     return false;
51 
52   if (!can_have_leading_zeros && len > 1 && '0' == first)
53     return false;
54 
55   return true;
56 }
57 
58 // A helper method for ParseStringToken.  It reads |digits| hex digits from the
59 // token. If the sequence if digits is not valid (contains other characters),
60 // the method returns false.
ReadHexDigits(JSONReader::Token & token,int digits)61 bool ReadHexDigits(JSONReader::Token& token, int digits) {
62   for (int i = 1; i <= digits; ++i) {
63     wchar_t c = *(token.begin + token.length + i);
64     if ('\0' == c)
65       return false;
66     if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
67           ('A' <= c && c <= 'F'))) {
68       return false;
69     }
70   }
71 
72   token.length += digits;
73   return true;
74 }
75 
76 }  // anonymous namespace
77 
78 const char* JSONReader::kBadRootElementType =
79     "Root value must be an array or object.";
80 const char* JSONReader::kInvalidEscape =
81     "Invalid escape sequence.";
82 const char* JSONReader::kSyntaxError =
83     "Syntax error.";
84 const char* JSONReader::kTrailingComma =
85     "Trailing comma not allowed.";
86 const char* JSONReader::kTooMuchNesting =
87     "Too much nesting.";
88 const char* JSONReader::kUnexpectedDataAfterRoot =
89     "Unexpected data after root element.";
90 const char* JSONReader::kUnsupportedEncoding =
91     "Unsupported encoding. JSON must be UTF-8.";
92 const char* JSONReader::kUnquotedDictionaryKey =
93     "Dictionary keys must be quoted.";
94 
95 /* static */
Read(const std::string & json,bool allow_trailing_comma)96 Value* JSONReader::Read(const std::string& json,
97                         bool allow_trailing_comma) {
98   return ReadAndReturnError(json, allow_trailing_comma, NULL);
99 }
100 
101 /* static */
ReadAndReturnError(const std::string & json,bool allow_trailing_comma,std::string * error_message_out)102 Value* JSONReader::ReadAndReturnError(const std::string& json,
103                                       bool allow_trailing_comma,
104                                       std::string *error_message_out) {
105   JSONReader reader = JSONReader();
106   Value* root = reader.JsonToValue(json, true, allow_trailing_comma);
107   if (root)
108     return root;
109 
110   if (error_message_out)
111     *error_message_out = reader.error_message();
112 
113   return NULL;
114 }
115 
116 /* static */
FormatErrorMessage(int line,int column,const char * description)117 std::string JSONReader::FormatErrorMessage(int line, int column,
118                                            const char* description) {
119   return StringPrintf("Line: %i, column: %i, %s",
120                       line, column, description);
121 }
122 
JSONReader()123 JSONReader::JSONReader()
124     : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),
125       allow_trailing_comma_(false) {}
126 
JsonToValue(const std::string & json,bool check_root,bool allow_trailing_comma)127 Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
128                                bool allow_trailing_comma) {
129   // The input must be in UTF-8.
130   if (!IsStringUTF8(json.c_str())) {
131     error_message_ = kUnsupportedEncoding;
132     return NULL;
133   }
134 
135   // The conversion from UTF8 to wstring removes null bytes for us
136   // (a good thing).
137   std::wstring json_wide(UTF8ToWide(json));
138   start_pos_ = json_wide.c_str();
139 
140   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
141   // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
142   // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
143   // mis-treating a Unicode BOM as an invalid character and returning NULL,
144   // skip a converted Unicode BOM if it exists.
145   if (!json_wide.empty() && start_pos_[0] == 0xFEFF) {
146     ++start_pos_;
147   }
148 
149   json_pos_ = start_pos_;
150   allow_trailing_comma_ = allow_trailing_comma;
151   stack_depth_ = 0;
152   error_message_.clear();
153 
154   scoped_ptr<Value> root(BuildValue(check_root));
155   if (root.get()) {
156     if (ParseToken().type == Token::END_OF_INPUT) {
157       return root.release();
158     } else {
159       SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_);
160     }
161   }
162 
163   // Default to calling errors "syntax errors".
164   if (error_message_.empty())
165     SetErrorMessage(kSyntaxError, json_pos_);
166 
167   return NULL;
168 }
169 
BuildValue(bool is_root)170 Value* JSONReader::BuildValue(bool is_root) {
171   ++stack_depth_;
172   if (stack_depth_ > kStackLimit) {
173     SetErrorMessage(kTooMuchNesting, json_pos_);
174     return NULL;
175   }
176 
177   Token token = ParseToken();
178   // The root token must be an array or an object.
179   if (is_root && token.type != Token::OBJECT_BEGIN &&
180       token.type != Token::ARRAY_BEGIN) {
181     SetErrorMessage(kBadRootElementType, json_pos_);
182     return NULL;
183   }
184 
185   scoped_ptr<Value> node;
186 
187   switch (token.type) {
188     case Token::END_OF_INPUT:
189     case Token::INVALID_TOKEN:
190       return NULL;
191 
192     case Token::NULL_TOKEN:
193       node.reset(Value::CreateNullValue());
194       break;
195 
196     case Token::BOOL_TRUE:
197       node.reset(Value::CreateBooleanValue(true));
198       break;
199 
200     case Token::BOOL_FALSE:
201       node.reset(Value::CreateBooleanValue(false));
202       break;
203 
204     case Token::NUMBER:
205       node.reset(DecodeNumber(token));
206       if (!node.get())
207         return NULL;
208       break;
209 
210     case Token::STRING:
211       node.reset(DecodeString(token));
212       if (!node.get())
213         return NULL;
214       break;
215 
216     case Token::ARRAY_BEGIN:
217       {
218         json_pos_ += token.length;
219         token = ParseToken();
220 
221         node.reset(new ListValue());
222         while (token.type != Token::ARRAY_END) {
223           Value* array_node = BuildValue(false);
224           if (!array_node)
225             return NULL;
226           static_cast<ListValue*>(node.get())->Append(array_node);
227 
228           // After a list value, we expect a comma or the end of the list.
229           token = ParseToken();
230           if (token.type == Token::LIST_SEPARATOR) {
231             json_pos_ += token.length;
232             token = ParseToken();
233             // Trailing commas are invalid according to the JSON RFC, but some
234             // consumers need the parsing leniency, so handle accordingly.
235             if (token.type == Token::ARRAY_END) {
236               if (!allow_trailing_comma_) {
237                 SetErrorMessage(kTrailingComma, json_pos_);
238                 return NULL;
239               }
240               // Trailing comma OK, stop parsing the Array.
241               break;
242             }
243           } else if (token.type != Token::ARRAY_END) {
244             // Unexpected value after list value.  Bail out.
245             return NULL;
246           }
247         }
248         if (token.type != Token::ARRAY_END) {
249           return NULL;
250         }
251         break;
252       }
253 
254     case Token::OBJECT_BEGIN:
255       {
256         json_pos_ += token.length;
257         token = ParseToken();
258 
259         node.reset(new DictionaryValue);
260         while (token.type != Token::OBJECT_END) {
261           if (token.type != Token::STRING) {
262             SetErrorMessage(kUnquotedDictionaryKey, json_pos_);
263             return NULL;
264           }
265           scoped_ptr<Value> dict_key_value(DecodeString(token));
266           if (!dict_key_value.get())
267             return NULL;
268 
269           // Convert the key into a wstring.
270           std::wstring dict_key;
271           bool success = dict_key_value->GetAsString(&dict_key);
272           DCHECK(success);
273 
274           json_pos_ += token.length;
275           token = ParseToken();
276           if (token.type != Token::OBJECT_PAIR_SEPARATOR)
277             return NULL;
278 
279           json_pos_ += token.length;
280           token = ParseToken();
281           Value* dict_value = BuildValue(false);
282           if (!dict_value)
283             return NULL;
284           static_cast<DictionaryValue*>(node.get())->SetWithoutPathExpansion(
285               dict_key, dict_value);
286 
287           // After a key/value pair, we expect a comma or the end of the
288           // object.
289           token = ParseToken();
290           if (token.type == Token::LIST_SEPARATOR) {
291             json_pos_ += token.length;
292             token = ParseToken();
293             // Trailing commas are invalid according to the JSON RFC, but some
294             // consumers need the parsing leniency, so handle accordingly.
295             if (token.type == Token::OBJECT_END) {
296               if (!allow_trailing_comma_) {
297                 SetErrorMessage(kTrailingComma, json_pos_);
298                 return NULL;
299               }
300               // Trailing comma OK, stop parsing the Object.
301               break;
302             }
303           } else if (token.type != Token::OBJECT_END) {
304             // Unexpected value after last object value.  Bail out.
305             return NULL;
306           }
307         }
308         if (token.type != Token::OBJECT_END)
309           return NULL;
310 
311         break;
312       }
313 
314     default:
315       // We got a token that's not a value.
316       return NULL;
317   }
318   json_pos_ += token.length;
319 
320   --stack_depth_;
321   return node.release();
322 }
323 
ParseNumberToken()324 JSONReader::Token JSONReader::ParseNumberToken() {
325   // We just grab the number here.  We validate the size in DecodeNumber.
326   // According   to RFC4627, a valid number is: [minus] int [frac] [exp]
327   Token token(Token::NUMBER, json_pos_, 0);
328   wchar_t c = *json_pos_;
329   if ('-' == c) {
330     ++token.length;
331     c = token.NextChar();
332   }
333 
334   if (!ReadInt(token, false))
335     return kInvalidToken;
336 
337   // Optional fraction part
338   c = token.NextChar();
339   if ('.' == c) {
340     ++token.length;
341     if (!ReadInt(token, true))
342       return kInvalidToken;
343     c = token.NextChar();
344   }
345 
346   // Optional exponent part
347   if ('e' == c || 'E' == c) {
348     ++token.length;
349     c = token.NextChar();
350     if ('-' == c || '+' == c) {
351       ++token.length;
352       c = token.NextChar();
353     }
354     if (!ReadInt(token, true))
355       return kInvalidToken;
356   }
357 
358   return token;
359 }
360 
DecodeNumber(const Token & token)361 Value* JSONReader::DecodeNumber(const Token& token) {
362   const std::wstring num_string(token.begin, token.length);
363 
364   int num_int;
365   if (StringToInt(WideToUTF16Hack(num_string), &num_int))
366     return Value::CreateIntegerValue(num_int);
367 
368   double num_double;
369   if (StringToDouble(WideToUTF16Hack(num_string), &num_double) &&
370       base::IsFinite(num_double))
371     return Value::CreateRealValue(num_double);
372 
373   return NULL;
374 }
375 
ParseStringToken()376 JSONReader::Token JSONReader::ParseStringToken() {
377   Token token(Token::STRING, json_pos_, 1);
378   wchar_t c = token.NextChar();
379   while ('\0' != c) {
380     if ('\\' == c) {
381       ++token.length;
382       c = token.NextChar();
383       // Make sure the escaped char is valid.
384       switch (c) {
385         case 'x':
386           if (!ReadHexDigits(token, 2)) {
387             SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
388             return kInvalidToken;
389           }
390           break;
391         case 'u':
392           if (!ReadHexDigits(token, 4)) {
393             SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
394             return kInvalidToken;
395           }
396           break;
397         case '\\':
398         case '/':
399         case 'b':
400         case 'f':
401         case 'n':
402         case 'r':
403         case 't':
404         case 'v':
405         case '"':
406           break;
407         default:
408           SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
409           return kInvalidToken;
410       }
411     } else if ('"' == c) {
412       ++token.length;
413       return token;
414     }
415     ++token.length;
416     c = token.NextChar();
417   }
418   return kInvalidToken;
419 }
420 
DecodeString(const Token & token)421 Value* JSONReader::DecodeString(const Token& token) {
422   std::wstring decoded_str;
423   decoded_str.reserve(token.length - 2);
424 
425   for (int i = 1; i < token.length - 1; ++i) {
426     wchar_t c = *(token.begin + i);
427     if ('\\' == c) {
428       ++i;
429       c = *(token.begin + i);
430       switch (c) {
431         case '"':
432         case '/':
433         case '\\':
434           decoded_str.push_back(c);
435           break;
436         case 'b':
437           decoded_str.push_back('\b');
438           break;
439         case 'f':
440           decoded_str.push_back('\f');
441           break;
442         case 'n':
443           decoded_str.push_back('\n');
444           break;
445         case 'r':
446           decoded_str.push_back('\r');
447           break;
448         case 't':
449           decoded_str.push_back('\t');
450           break;
451         case 'v':
452           decoded_str.push_back('\v');
453           break;
454 
455         case 'x':
456           decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) +
457                                 HexToInt(*(token.begin + i + 2)));
458           i += 2;
459           break;
460         case 'u':
461           decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) +
462                                 (HexToInt(*(token.begin + i + 2)) << 8) +
463                                 (HexToInt(*(token.begin + i + 3)) << 4) +
464                                 HexToInt(*(token.begin + i + 4)));
465           i += 4;
466           break;
467 
468         default:
469           // We should only have valid strings at this point.  If not,
470           // ParseStringToken didn't do it's job.
471           NOTREACHED();
472           return NULL;
473       }
474     } else {
475       // Not escaped
476       decoded_str.push_back(c);
477     }
478   }
479   return Value::CreateStringValue(decoded_str);
480 }
481 
ParseToken()482 JSONReader::Token JSONReader::ParseToken() {
483   static const std::wstring kNullString(L"null");
484   static const std::wstring kTrueString(L"true");
485   static const std::wstring kFalseString(L"false");
486 
487   EatWhitespaceAndComments();
488 
489   Token token(Token::INVALID_TOKEN, 0, 0);
490   switch (*json_pos_) {
491     case '\0':
492       token.type = Token::END_OF_INPUT;
493       break;
494 
495     case 'n':
496       if (NextStringMatch(kNullString))
497         token = Token(Token::NULL_TOKEN, json_pos_, 4);
498       break;
499 
500     case 't':
501       if (NextStringMatch(kTrueString))
502         token = Token(Token::BOOL_TRUE, json_pos_, 4);
503       break;
504 
505     case 'f':
506       if (NextStringMatch(kFalseString))
507         token = Token(Token::BOOL_FALSE, json_pos_, 5);
508       break;
509 
510     case '[':
511       token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
512       break;
513 
514     case ']':
515       token = Token(Token::ARRAY_END, json_pos_, 1);
516       break;
517 
518     case ',':
519       token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
520       break;
521 
522     case '{':
523       token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
524       break;
525 
526     case '}':
527       token = Token(Token::OBJECT_END, json_pos_, 1);
528       break;
529 
530     case ':':
531       token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
532       break;
533 
534     case '0':
535     case '1':
536     case '2':
537     case '3':
538     case '4':
539     case '5':
540     case '6':
541     case '7':
542     case '8':
543     case '9':
544     case '-':
545       token = ParseNumberToken();
546       break;
547 
548     case '"':
549       token = ParseStringToken();
550       break;
551   }
552   return token;
553 }
554 
NextStringMatch(const std::wstring & str)555 bool JSONReader::NextStringMatch(const std::wstring& str) {
556   for (size_t i = 0; i < str.length(); ++i) {
557     if ('\0' == *json_pos_)
558       return false;
559     if (*(json_pos_ + i) != str[i])
560       return false;
561   }
562   return true;
563 }
564 
EatWhitespaceAndComments()565 void JSONReader::EatWhitespaceAndComments() {
566   while ('\0' != *json_pos_) {
567     switch (*json_pos_) {
568       case ' ':
569       case '\n':
570       case '\r':
571       case '\t':
572         ++json_pos_;
573         break;
574       case '/':
575         // TODO(tc): This isn't in the RFC so it should be a parser flag.
576         if (!EatComment())
577           return;
578         break;
579       default:
580         // Not a whitespace char, just exit.
581         return;
582     }
583   }
584 }
585 
EatComment()586 bool JSONReader::EatComment() {
587   if ('/' != *json_pos_)
588     return false;
589 
590   wchar_t next_char = *(json_pos_ + 1);
591   if ('/' == next_char) {
592     // Line comment, read until \n or \r
593     json_pos_ += 2;
594     while ('\0' != *json_pos_) {
595       switch (*json_pos_) {
596         case '\n':
597         case '\r':
598           ++json_pos_;
599           return true;
600         default:
601           ++json_pos_;
602       }
603     }
604   } else if ('*' == next_char) {
605     // Block comment, read until */
606     json_pos_ += 2;
607     while ('\0' != *json_pos_) {
608       if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
609         json_pos_ += 2;
610         return true;
611       }
612       ++json_pos_;
613     }
614   } else {
615     return false;
616   }
617   return true;
618 }
619 
SetErrorMessage(const char * description,const wchar_t * error_pos)620 void JSONReader::SetErrorMessage(const char* description,
621                                  const wchar_t* error_pos) {
622   int line_number = 1;
623   int column_number = 1;
624 
625   // Figure out the line and column the error occured at.
626   for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) {
627     if (*pos == '\0') {
628       NOTREACHED();
629       return;
630     }
631 
632     if (*pos == '\n') {
633       ++line_number;
634       column_number = 1;
635     } else {
636       ++column_number;
637     }
638   }
639 
640   error_message_ = FormatErrorMessage(line_number, column_number, description);
641 }
642 
643 }  // namespace base
644