• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // A JSON parser.  Converts strings of JSON into a Value object (see
6 // base/values.h).
7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627
8 //
9 // Known limitations/deviations from the RFC:
10 // - Only knows how to parse ints within the range of a signed 32 bit int and
11 //   decimal numbers within a double.
12 // - Assumes input is encoded as UTF8.  The spec says we should allow UTF-16
13 //   (BE or LE) and UTF-32 (BE or LE) as well.
14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed
15 //   by the RFC).
16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
17 //   stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
18 //   UTF-8 string for the JSONReader::JsonToValue() function may start with a
19 //   UTF-8 BOM (0xEF, 0xBB, 0xBF).
20 //   To avoid the function from mis-treating a UTF-8 BOM as an invalid
21 //   character, the function skips a Unicode BOM at the beginning of the
22 //   Unicode string (converted from the input UTF-8 string) before parsing it.
23 //
24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in
25 //   double quotes
26 // TODO(tc): Add an option to disable comment stripping
27 // TODO(aa): Consider making the constructor public and the static Read() method
28 // only a convenience for the common uses with more complex configuration going
29 // on the instance.
30 
31 #ifndef BASE_JSON_JSON_READER_H_
32 #define BASE_JSON_JSON_READER_H_
33 
34 #include <string>
35 
36 #include "base/basictypes.h"
37 
38 // Chromium and Chromium OS check out gtest to different places, so we're
39 // unable to compile on both if we include gtest_prod.h here.  Instead, include
40 // its only contents -- this will need to be updated if the macro ever changes.
41 #define FRIEND_TEST(test_case_name, test_name)\
42 friend class test_case_name##_##test_name##_Test
43 
44 class Value;
45 
46 namespace base {
47 
48 class JSONReader {
49  public:
50   // A struct to hold a JS token.
51   class Token {
52    public:
53     enum Type {
54      OBJECT_BEGIN,           // {
55      OBJECT_END,             // }
56      ARRAY_BEGIN,            // [
57      ARRAY_END,              // ]
58      STRING,
59      NUMBER,
60      BOOL_TRUE,              // true
61      BOOL_FALSE,             // false
62      NULL_TOKEN,             // null
63      LIST_SEPARATOR,         // ,
64      OBJECT_PAIR_SEPARATOR,  // :
65      END_OF_INPUT,
66      INVALID_TOKEN,
67     };
Token(Type t,const wchar_t * b,int len)68     Token(Type t, const wchar_t* b, int len)
69       : type(t), begin(b), length(len) {}
70 
71     Type type;
72 
73     // A pointer into JSONReader::json_pos_ that's the beginning of this token.
74     const wchar_t* begin;
75 
76     // End should be one char past the end of the token.
77     int length;
78 
79     // Get the character that's one past the end of this token.
NextChar()80     wchar_t NextChar() {
81       return *(begin + length);
82     }
83   };
84 
85   // Error messages that can be returned.
86   static const char* kBadRootElementType;
87   static const char* kInvalidEscape;
88   static const char* kSyntaxError;
89   static const char* kTrailingComma;
90   static const char* kTooMuchNesting;
91   static const char* kUnexpectedDataAfterRoot;
92   static const char* kUnsupportedEncoding;
93   static const char* kUnquotedDictionaryKey;
94 
95   JSONReader();
96 
97   // Reads and parses |json|, returning a Value. The caller owns the returned
98   // instance. If |json| is not a properly formed JSON string, returns NULL.
99   // If |allow_trailing_comma| is true, we will ignore trailing commas in
100   // objects and arrays even though this goes against the RFC.
101   static Value* Read(const std::string& json, bool allow_trailing_comma);
102 
103   // Reads and parses |json| like Read(). |error_message_out| is optional. If
104   // specified and NULL is returned, |error_message_out| will be populated with
105   // a string describing the error. Otherwise, |error_message_out| is
106   // unmodified.
107   static Value* ReadAndReturnError(const std::string& json,
108                                    bool allow_trailing_comma,
109                                    std::string* error_message_out);
110 
111   // Returns the error message if the last call to JsonToValue() failed. If the
112   // last call did not fail, returns a valid empty string.
error_message()113   std::string error_message() { return error_message_; }
114 
115   // Reads and parses |json|, returning a Value. The caller owns the returned
116   // instance. If |json| is not a properly formed JSON string, returns NULL and
117   // a detailed error can be retrieved from |error_message()|.
118   // If |check_root| is true, we require that the root object be an object or
119   // array. Otherwise, it can be any valid JSON type.
120   // If |allow_trailing_comma| is true, we will ignore trailing commas in
121   // objects and arrays even though this goes against the RFC.
122   Value* JsonToValue(const std::string& json, bool check_root,
123                      bool allow_trailing_comma);
124 
125  private:
126   static std::string FormatErrorMessage(int line, int column,
127                                         const char* description);
128 
129   DISALLOW_COPY_AND_ASSIGN(JSONReader);
130 
131   FRIEND_TEST(JSONReaderTest, Reading);
132   FRIEND_TEST(JSONReaderTest, ErrorMessages);
133 
134   // Recursively build Value.  Returns NULL if we don't have a valid JSON
135   // string.  If |is_root| is true, we verify that the root element is either
136   // an object or an array.
137   Value* BuildValue(bool is_root);
138 
139   // Parses a sequence of characters into a Token::NUMBER. If the sequence of
140   // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
141   // that DecodeNumber is used to actually convert from a string to an
142   // int/double.
143   Token ParseNumberToken();
144 
145   // Try and convert the substring that token holds into an int or a double. If
146   // we can (ie., no overflow), return the value, else return NULL.
147   Value* DecodeNumber(const Token& token);
148 
149   // Parses a sequence of characters into a Token::STRING. If the sequence of
150   // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
151   // that DecodeString is used to actually decode the escaped string into an
152   // actual wstring.
153   Token ParseStringToken();
154 
155   // Convert the substring into a value string.  This should always succeed
156   // (otherwise ParseStringToken would have failed).
157   Value* DecodeString(const Token& token);
158 
159   // Grabs the next token in the JSON stream.  This does not increment the
160   // stream so it can be used to look ahead at the next token.
161   Token ParseToken();
162 
163   // Increments |json_pos_| past leading whitespace and comments.
164   void EatWhitespaceAndComments();
165 
166   // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
167   // false.
168   bool EatComment();
169 
170   // Checks if |json_pos_| matches str.
171   bool NextStringMatch(const std::wstring& str);
172 
173   // Creates the error message that will be returned to the caller. The current
174   // line and column are determined and added into the final message.
175   void SetErrorMessage(const char* description, const wchar_t* error_pos);
176 
177   // Pointer to the starting position in the input string.
178   const wchar_t* start_pos_;
179 
180   // Pointer to the current position in the input string.
181   const wchar_t* json_pos_;
182 
183   // Used to keep track of how many nested lists/dicts there are.
184   int stack_depth_;
185 
186   // A parser flag that allows trailing commas in objects and arrays.
187   bool allow_trailing_comma_;
188 
189   // Contains the error message for the last call to JsonToValue(), if any.
190   std::string error_message_;
191 };
192 
193 }  // namespace base
194 
195 #endif  // BASE_JSON_JSON_READER_H_
196