• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // A JSON parser.  Converts strings of JSON into a Value object (see
6 // base/values.h).
7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627
8 //
9 // Known limitations/deviations from the RFC:
10 // - Only knows how to parse ints within the range of a signed 32 bit int and
11 //   decimal numbers within a double.
12 // - Assumes input is encoded as UTF8.  The spec says we should allow UTF-16
13 //   (BE or LE) and UTF-32 (BE or LE) as well.
14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed
15 //   by the RFC).
16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
17 //   stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
18 //   UTF-8 string for the JSONReader::JsonToValue() function may start with a
19 //   UTF-8 BOM (0xEF, 0xBB, 0xBF).
20 //   To avoid the function from mis-treating a UTF-8 BOM as an invalid
21 //   character, the function skips a Unicode BOM at the beginning of the
22 //   Unicode string (converted from the input UTF-8 string) before parsing it.
23 //
24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in
25 //   double quotes
26 // TODO(tc): Add an option to disable comment stripping
27 // TODO(aa): Consider making the constructor public and the static Read() method
28 // only a convenience for the common uses with more complex configuration going
29 // on the instance.
30 
31 #ifndef BASE_JSON_JSON_READER_H_
32 #define BASE_JSON_JSON_READER_H_
33 #pragma once
34 
35 #include <string>
36 
37 #include "base/base_api.h"
38 #include "base/basictypes.h"
39 
40 // Chromium and Chromium OS check out gtest to different places, so we're
41 // unable to compile on both if we include gtest_prod.h here.  Instead, include
42 // its only contents -- this will need to be updated if the macro ever changes.
43 #define FRIEND_TEST(test_case_name, test_name)\
44 friend class test_case_name##_##test_name##_Test
45 
46 class Value;
47 
48 namespace base {
49 
50 class BASE_API JSONReader {
51  public:
52   // A struct to hold a JS token.
53   class Token {
54    public:
55     enum Type {
56      OBJECT_BEGIN,           // {
57      OBJECT_END,             // }
58      ARRAY_BEGIN,            // [
59      ARRAY_END,              // ]
60      STRING,
61      NUMBER,
62      BOOL_TRUE,              // true
63      BOOL_FALSE,             // false
64      NULL_TOKEN,             // null
65      LIST_SEPARATOR,         // ,
66      OBJECT_PAIR_SEPARATOR,  // :
67      END_OF_INPUT,
68      INVALID_TOKEN,
69     };
Token(Type t,const wchar_t * b,int len)70     Token(Type t, const wchar_t* b, int len)
71       : type(t), begin(b), length(len) {}
72 
73     // Get the character that's one past the end of this token.
NextChar()74     wchar_t NextChar() {
75       return *(begin + length);
76     }
77 
78     Type type;
79 
80     // A pointer into JSONReader::json_pos_ that's the beginning of this token.
81     const wchar_t* begin;
82 
83     // End should be one char past the end of the token.
84     int length;
85   };
86 
87   // Error codes during parsing.
88   enum JsonParseError {
89     JSON_NO_ERROR = 0,
90     JSON_BAD_ROOT_ELEMENT_TYPE,
91     JSON_INVALID_ESCAPE,
92     JSON_SYNTAX_ERROR,
93     JSON_TRAILING_COMMA,
94     JSON_TOO_MUCH_NESTING,
95     JSON_UNEXPECTED_DATA_AFTER_ROOT,
96     JSON_UNSUPPORTED_ENCODING,
97     JSON_UNQUOTED_DICTIONARY_KEY,
98   };
99 
100   // String versions of parse error codes.
101   static const char* kBadRootElementType;
102   static const char* kInvalidEscape;
103   static const char* kSyntaxError;
104   static const char* kTrailingComma;
105   static const char* kTooMuchNesting;
106   static const char* kUnexpectedDataAfterRoot;
107   static const char* kUnsupportedEncoding;
108   static const char* kUnquotedDictionaryKey;
109 
110   JSONReader();
111 
112   // Reads and parses |json|, returning a Value. The caller owns the returned
113   // instance. If |json| is not a properly formed JSON string, returns NULL.
114   // If |allow_trailing_comma| is true, we will ignore trailing commas in
115   // objects and arrays even though this goes against the RFC.
116   static Value* Read(const std::string& json, bool allow_trailing_comma);
117 
118   // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out|
119   // are optional. If specified and NULL is returned, they will be populated
120   // an error code and a formatted error message (including error location if
121   // appropriate). Otherwise, they will be unmodified.
122   static Value* ReadAndReturnError(const std::string& json,
123                                    bool allow_trailing_comma,
124                                    int* error_code_out,
125                                    std::string* error_msg_out);
126 
127   // Converts a JSON parse error code into a human readable message.
128   // Returns an empty string if error_code is JSON_NO_ERROR.
129   static std::string ErrorCodeToString(JsonParseError error_code);
130 
131   // Returns the error code if the last call to JsonToValue() failed.
132   // Returns JSON_NO_ERROR otherwise.
error_code()133   JsonParseError error_code() const { return error_code_; }
134 
135   // Converts error_code_ to a human-readable string, including line and column
136   // numbers if appropriate.
137   std::string GetErrorMessage() const;
138 
139   // Reads and parses |json|, returning a Value. The caller owns the returned
140   // instance. If |json| is not a properly formed JSON string, returns NULL and
141   // a detailed error can be retrieved from |error_message()|.
142   // If |check_root| is true, we require that the root object be an object or
143   // array. Otherwise, it can be any valid JSON type.
144   // If |allow_trailing_comma| is true, we will ignore trailing commas in
145   // objects and arrays even though this goes against the RFC.
146   Value* JsonToValue(const std::string& json, bool check_root,
147                      bool allow_trailing_comma);
148 
149  private:
150   FRIEND_TEST(JSONReaderTest, Reading);
151   FRIEND_TEST(JSONReaderTest, ErrorMessages);
152 
153   static std::string FormatErrorMessage(int line, int column,
154                                         const std::string& description);
155 
156   // Recursively build Value.  Returns NULL if we don't have a valid JSON
157   // string.  If |is_root| is true, we verify that the root element is either
158   // an object or an array.
159   Value* BuildValue(bool is_root);
160 
161   // Parses a sequence of characters into a Token::NUMBER. If the sequence of
162   // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
163   // that DecodeNumber is used to actually convert from a string to an
164   // int/double.
165   Token ParseNumberToken();
166 
167   // Try and convert the substring that token holds into an int or a double. If
168   // we can (ie., no overflow), return the value, else return NULL.
169   Value* DecodeNumber(const Token& token);
170 
171   // Parses a sequence of characters into a Token::STRING. If the sequence of
172   // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
173   // that DecodeString is used to actually decode the escaped string into an
174   // actual wstring.
175   Token ParseStringToken();
176 
177   // Convert the substring into a value string.  This should always succeed
178   // (otherwise ParseStringToken would have failed).
179   Value* DecodeString(const Token& token);
180 
181   // Grabs the next token in the JSON stream.  This does not increment the
182   // stream so it can be used to look ahead at the next token.
183   Token ParseToken();
184 
185   // Increments |json_pos_| past leading whitespace and comments.
186   void EatWhitespaceAndComments();
187 
188   // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
189   // false.
190   bool EatComment();
191 
192   // Checks if |json_pos_| matches str.
193   bool NextStringMatch(const std::wstring& str);
194 
195   // Sets the error code that will be returned to the caller. The current
196   // line and column are determined and added into the final message.
197   void SetErrorCode(const JsonParseError error, const wchar_t* error_pos);
198 
199   // Pointer to the starting position in the input string.
200   const wchar_t* start_pos_;
201 
202   // Pointer to the current position in the input string.
203   const wchar_t* json_pos_;
204 
205   // Used to keep track of how many nested lists/dicts there are.
206   int stack_depth_;
207 
208   // A parser flag that allows trailing commas in objects and arrays.
209   bool allow_trailing_comma_;
210 
211   // Contains the error code for the last call to JsonToValue(), if any.
212   JsonParseError error_code_;
213   int error_line_;
214   int error_col_;
215 
216   DISALLOW_COPY_AND_ASSIGN(JSONReader);
217 };
218 
219 }  // namespace base
220 
221 #endif  // BASE_JSON_JSON_READER_H_
222