• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_JSON_JSON_PARSER_H_
6 #define BASE_JSON_JSON_PARSER_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <string>
12 
13 #include "base/base_export.h"
14 #include "base/compiler_specific.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/json/json_reader.h"
17 #include "base/macros.h"
18 #include "base/strings/string_piece.h"
19 
20 namespace base {
21 
22 class Value;
23 
24 namespace internal {
25 
26 class JSONParserTest;
27 
28 // The implementation behind the JSONReader interface. This class is not meant
29 // to be used directly; it encapsulates logic that need not be exposed publicly.
30 //
31 // This parser guarantees O(n) time through the input string. It also optimizes
32 // base::StringValue by using StringPiece where possible when returning Value
33 // objects by using "hidden roots," discussed in the implementation.
34 //
35 // Iteration happens on the byte level, with the functions CanConsume and
36 // NextChar. The conversion from byte to JSON token happens without advancing
37 // the parser in GetNextToken/ParseToken, that is tokenization operates on
38 // the current parser position without advancing.
39 //
40 // Built on top of these are a family of Consume functions that iterate
41 // internally. Invariant: on entry of a Consume function, the parser is wound
42 // to the first byte of a valid JSON token. On exit, it is on the last byte
43 // of a token, such that the next iteration of the parser will be at the byte
44 // immediately following the token, which would likely be the first byte of the
45 // next token.
46 class BASE_EXPORT JSONParser {
47  public:
48   explicit JSONParser(int options);
49   ~JSONParser();
50 
51   // Parses the input string according to the set options and returns the
52   // result as a Value owned by the caller.
53   Value* Parse(const StringPiece& input);
54 
55   // Returns the error code.
56   JSONReader::JsonParseError error_code() const;
57 
58   // Returns the human-friendly error message.
59   std::string GetErrorMessage() const;
60 
61   // Returns the error line number if parse error happened. Otherwise always
62   // returns 0.
63   int error_line() const;
64 
65   // Returns the error column number if parse error happened. Otherwise always
66   // returns 0.
67   int error_column() const;
68 
69  private:
70   enum Token {
71     T_OBJECT_BEGIN,           // {
72     T_OBJECT_END,             // }
73     T_ARRAY_BEGIN,            // [
74     T_ARRAY_END,              // ]
75     T_STRING,
76     T_NUMBER,
77     T_BOOL_TRUE,              // true
78     T_BOOL_FALSE,             // false
79     T_NULL,                   // null
80     T_LIST_SEPARATOR,         // ,
81     T_OBJECT_PAIR_SEPARATOR,  // :
82     T_END_OF_INPUT,
83     T_INVALID_TOKEN,
84   };
85 
86   // A helper class used for parsing strings. One optimization performed is to
87   // create base::Value with a StringPiece to avoid unnecessary std::string
88   // copies. This is not possible if the input string needs to be decoded from
89   // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
90   // This class centralizes that logic.
91   class StringBuilder {
92    public:
93     // Empty constructor. Used for creating a builder with which to Swap().
94     StringBuilder();
95 
96     // |pos| is the beginning of an input string, excluding the |"|.
97     explicit StringBuilder(const char* pos);
98 
99     ~StringBuilder();
100 
101     // Swaps the contents of |other| with this.
102     void Swap(StringBuilder* other);
103 
104     // Either increases the |length_| of the string or copies the character if
105     // the StringBuilder has been converted. |c| must be in the basic ASCII
106     // plane; all other characters need to be in UTF-8 units, appended with
107     // AppendString below.
108     void Append(const char& c);
109 
110     // Appends a string to the std::string. Must be Convert()ed to use.
111     void AppendString(const std::string& str);
112 
113     // Converts the builder from its default StringPiece to a full std::string,
114     // performing a copy. Once a builder is converted, it cannot be made a
115     // StringPiece again.
116     void Convert();
117 
118     // Returns whether the builder can be converted to a StringPiece.
119     bool CanBeStringPiece() const;
120 
121     // Returns the StringPiece representation. Returns an empty piece if it
122     // cannot be converted.
123     StringPiece AsStringPiece();
124 
125     // Returns the builder as a std::string.
126     const std::string& AsString();
127 
128    private:
129     // The beginning of the input string.
130     const char* pos_;
131 
132     // Number of bytes in |pos_| that make up the string being built.
133     size_t length_;
134 
135     // The copied string representation. NULL until Convert() is called.
136     // Strong. scoped_ptr<T> has too much of an overhead here.
137     std::string* string_;
138   };
139 
140   // Quick check that the stream has capacity to consume |length| more bytes.
141   bool CanConsume(int length);
142 
143   // The basic way to consume a single character in the stream. Consumes one
144   // byte of the input stream and returns a pointer to the rest of it.
145   const char* NextChar();
146 
147   // Performs the equivalent of NextChar N times.
148   void NextNChars(int n);
149 
150   // Skips over whitespace and comments to find the next token in the stream.
151   // This does not advance the parser for non-whitespace or comment chars.
152   Token GetNextToken();
153 
154   // Consumes whitespace characters and comments until the next non-that is
155   // encountered.
156   void EatWhitespaceAndComments();
157   // Helper function that consumes a comment, assuming that the parser is
158   // currently wound to a '/'.
159   bool EatComment();
160 
161   // Calls GetNextToken() and then ParseToken(). Caller owns the result.
162   Value* ParseNextToken();
163 
164   // Takes a token that represents the start of a Value ("a structural token"
165   // in RFC terms) and consumes it, returning the result as an object the
166   // caller owns.
167   Value* ParseToken(Token token);
168 
169   // Assuming that the parser is currently wound to '{', this parses a JSON
170   // object into a DictionaryValue.
171   Value* ConsumeDictionary();
172 
173   // Assuming that the parser is wound to '[', this parses a JSON list into a
174   // ListValue.
175   Value* ConsumeList();
176 
177   // Calls through ConsumeStringRaw and wraps it in a value.
178   Value* ConsumeString();
179 
180   // Assuming that the parser is wound to a double quote, this parses a string,
181   // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
182   // success and Swap()s the result into |out|. Returns false on failure with
183   // error information set.
184   bool ConsumeStringRaw(StringBuilder* out);
185   // Helper function for ConsumeStringRaw() that consumes the next four or 10
186   // bytes (parser is wound to the first character of a HEX sequence, with the
187   // potential for consuming another \uXXXX for a surrogate). Returns true on
188   // success and places the UTF8 code units in |dest_string|, and false on
189   // failure.
190   bool DecodeUTF16(std::string* dest_string);
191   // Helper function for ConsumeStringRaw() that takes a single code point,
192   // decodes it into UTF-8 units, and appends it to the given builder. The
193   // point must be valid.
194   void DecodeUTF8(const int32_t& point, StringBuilder* dest);
195 
196   // Assuming that the parser is wound to the start of a valid JSON number,
197   // this parses and converts it to either an int or double value.
198   Value* ConsumeNumber();
199   // Helper that reads characters that are ints. Returns true if a number was
200   // read and false on error.
201   bool ReadInt(bool allow_leading_zeros);
202 
203   // Consumes the literal values of |true|, |false|, and |null|, assuming the
204   // parser is wound to the first character of any of those.
205   Value* ConsumeLiteral();
206 
207   // Compares two string buffers of a given length.
208   static bool StringsAreEqual(const char* left, const char* right, size_t len);
209 
210   // Sets the error information to |code| at the current column, based on
211   // |index_| and |index_last_line_|, with an optional positive/negative
212   // adjustment by |column_adjust|.
213   void ReportError(JSONReader::JsonParseError code, int column_adjust);
214 
215   // Given the line and column number of an error, formats one of the error
216   // message contants from json_reader.h for human display.
217   static std::string FormatErrorMessage(int line, int column,
218                                         const std::string& description);
219 
220   // base::JSONParserOptions that control parsing.
221   int options_;
222 
223   // Pointer to the start of the input data.
224   const char* start_pos_;
225 
226   // Pointer to the current position in the input data. Equivalent to
227   // |start_pos_ + index_|.
228   const char* pos_;
229 
230   // Pointer to the last character of the input data.
231   const char* end_pos_;
232 
233   // The index in the input stream to which the parser is wound.
234   int index_;
235 
236   // The number of times the parser has recursed (current stack depth).
237   int stack_depth_;
238 
239   // The line number that the parser is at currently.
240   int line_number_;
241 
242   // The last value of |index_| on the previous line.
243   int index_last_line_;
244 
245   // Error information.
246   JSONReader::JsonParseError error_code_;
247   int error_line_;
248   int error_column_;
249 
250   friend class JSONParserTest;
251   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
252   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
253   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
254   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
255   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
256   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
257   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
258 
259   DISALLOW_COPY_AND_ASSIGN(JSONParser);
260 };
261 
262 }  // namespace internal
263 }  // namespace base
264 
265 #endif  // BASE_JSON_JSON_PARSER_H_
266