• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_JSON_JSON_PARSER_H_
6 #define BASE_JSON_JSON_PARSER_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <memory>
12 #include <optional>
13 #include <string>
14 #include <string_view>
15 
16 #include "base/compiler_specific.h"
17 #include "base/gtest_prod_util.h"
18 #include "base/json/json_reader.h"
19 #include "base/macros.h"
20 
21 namespace base {
22 
23 class Value;
24 
25 namespace internal {
26 
27 class JSONParserTest;
28 
29 // The implementation behind the JSONReader interface. This class is not meant
30 // to be used directly; it encapsulates logic that need not be exposed publicly.
31 //
32 // This parser guarantees O(n) time through the input string. Iteration happens
33 // on the byte level, with the functions ConsumeChars() and ConsumeChar(). The
34 // conversion from byte to JSON token happens without advancing the parser in
35 // GetNextToken/ParseToken, that is tokenization operates on the current parser
36 // position without advancing.
37 //
38 // Built on top of these are a family of Consume functions that iterate
39 // internally. Invariant: on entry of a Consume function, the parser is wound
40 // to the first byte of a valid JSON token. On exit, it is on the first byte
41 // after the token that was just consumed, which would likely be the first byte
42 // of the next token.
43 class JSONParser {
44  public:
45   JSONParser(int options, int max_depth = JSONReader::kStackMaxDepth);
46   ~JSONParser();
47 
48   // Parses the input string according to the set options and returns the
49   // result as a Value.
50   // Wrap this in base::FooValue::From() to check the Value is of type Foo and
51   // convert to a FooValue at the same time.
52   std::optional<Value> Parse(std::string_view input);
53 
54   // Returns the error code.
55   JSONReader::JsonParseError error_code() const;
56 
57   // Returns the human-friendly error message.
58   std::string GetErrorMessage() const;
59 
60   // Returns the error line number if parse error happened. Otherwise always
61   // returns 0.
62   int error_line() const;
63 
64   // Returns the error column number if parse error happened. Otherwise always
65   // returns 0.
66   int error_column() const;
67 
68  private:
69   enum Token {
70     T_OBJECT_BEGIN,  // {
71     T_OBJECT_END,    // }
72     T_ARRAY_BEGIN,   // [
73     T_ARRAY_END,     // ]
74     T_STRING,
75     T_NUMBER,
76     T_BOOL_TRUE,              // true
77     T_BOOL_FALSE,             // false
78     T_NULL,                   // null
79     T_LIST_SEPARATOR,         // ,
80     T_OBJECT_PAIR_SEPARATOR,  // :
81     T_END_OF_INPUT,
82     T_INVALID_TOKEN,
83   };
84 
85   // A helper class used for parsing strings. One optimization performed is to
86   // create base::Value with a std::string_view to avoid unnecessary std::string
87   // copies. This is not possible if the input string needs to be decoded from
88   // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
89   // This class centralizes that logic.
90   class StringBuilder {
91    public:
92     // Empty constructor. Used for creating a builder with which to assign to.
93     StringBuilder();
94 
95     // |pos| is the beginning of an input string, excluding the |"|.
96     explicit StringBuilder(const char* pos);
97 
98     ~StringBuilder();
99 
100     StringBuilder& operator=(StringBuilder&& other);
101 
102     // Appends the Unicode code point |point| to the string, either by
103     // increasing the |length_| of the string if the string has not been
104     // converted, or by appending the UTF8 bytes for the code point.
105     void Append(uint32_t point);
106 
107     // Converts the builder from its default std::string_view to a full
108     // std::string, performing a copy. Once a builder is converted, it cannot be
109     // made a std::string_view again.
110     void Convert();
111 
112     // Returns the builder as a string, invalidating all state. This allows
113     // the internal string buffer representation to be destructively moved
114     // in cases where the builder will not be needed any more.
115     std::string DestructiveAsString();
116 
117    private:
118     // The beginning of the input string.
119     const char* pos_;
120 
121     // Number of bytes in |pos_| that make up the string being built.
122     size_t length_;
123 
124     // The copied string representation. Will be unset until Convert() is
125     // called.
126     std::optional<std::string> string_;
127   };
128 
129   // Returns the next |count| bytes of the input stream, or nullopt if fewer
130   // than |count| bytes remain.
131   std::optional<std::string_view> PeekChars(int count);
132 
133   // Calls PeekChars() with a |count| of 1.
134   std::optional<char> PeekChar();
135 
136   // Returns the next |count| bytes of the input stream, or nullopt if fewer
137   // than |count| bytes remain, and advances the parser position by |count|.
138   std::optional<std::string_view> ConsumeChars(int count);
139 
140   // Calls ConsumeChars() with a |count| of 1.
141   std::optional<char> ConsumeChar();
142 
143   // Returns a pointer to the current character position.
144   const char* pos();
145 
146   // Skips over whitespace and comments to find the next token in the stream.
147   // This does not advance the parser for non-whitespace or comment chars.
148   Token GetNextToken();
149 
150   // Consumes whitespace characters and comments until the next non-that is
151   // encountered.
152   void EatWhitespaceAndComments();
153   // Helper function that consumes a comment, assuming that the parser is
154   // currently wound to a '/'.
155   bool EatComment();
156 
157   // Calls GetNextToken() and then ParseToken().
158   std::optional<Value> ParseNextToken();
159 
160   // Takes a token that represents the start of a Value ("a structural token"
161   // in RFC terms) and consumes it, returning the result as a Value.
162   std::optional<Value> ParseToken(Token token);
163 
164   // Assuming that the parser is currently wound to '{', this parses a JSON
165   // object into a Value.
166   std::optional<Value> ConsumeDictionary();
167 
168   // Assuming that the parser is wound to '[', this parses a JSON list into a
169   // Value.
170   std::optional<Value> ConsumeList();
171 
172   // Calls through ConsumeStringRaw and wraps it in a value.
173   std::optional<Value> ConsumeString();
174 
175   // Assuming that the parser is wound to a double quote, this parses a string,
176   // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
177   // success and places result into |out|. Returns false on failure with
178   // error information set.
179   bool ConsumeStringRaw(StringBuilder* out);
180   // Helper function for ConsumeStringRaw() that consumes the next four or 10
181   // bytes (parser is wound to the first character of a HEX sequence, with the
182   // potential for consuming another \uXXXX for a surrogate). Returns true on
183   // success and places the code point |out_code_point|, and false on failure.
184   bool DecodeUTF16(uint32_t* out_code_point);
185 
186   // Assuming that the parser is wound to the start of a valid JSON number,
187   // this parses and converts it to either an int or double value.
188   std::optional<Value> ConsumeNumber();
189   // Helper that reads characters that are ints. Returns true if a number was
190   // read and false on error.
191   bool ReadInt(bool allow_leading_zeros);
192 
193   // Consumes the literal values of |true|, |false|, and |null|, assuming the
194   // parser is wound to the first character of any of those.
195   std::optional<Value> ConsumeLiteral();
196 
197   // Helper function that returns true if the byte squence |match| can be
198   // consumed at the current parser position. Returns false if there are fewer
199   // than |match|-length bytes or if the sequence does not match, and the
200   // parser state is unchanged.
201   bool ConsumeIfMatch(std::string_view match);
202 
203   // Sets the error information to |code| at the current column, based on
204   // |index_| and |index_last_line_|, with an optional positive/negative
205   // adjustment by |column_adjust|.
206   void ReportError(JSONReader::JsonParseError code, int column_adjust);
207 
208   // Given the line and column number of an error, formats one of the error
209   // message contants from json_reader.h for human display.
210   static std::string FormatErrorMessage(int line,
211                                         int column,
212                                         const std::string& description);
213 
214   // base::JSONParserOptions that control parsing.
215   const int options_;
216 
217   // Maximum depth to parse.
218   const int max_depth_;
219 
220   // The input stream being parsed. Note: Not guaranteed to NUL-terminated.
221   std::string_view input_;
222 
223   // The index in the input stream to which the parser is wound.
224   int index_;
225 
226   // The number of times the parser has recursed (current stack depth).
227   int stack_depth_;
228 
229   // The line number that the parser is at currently.
230   int line_number_;
231 
232   // The last value of |index_| on the previous line.
233   int index_last_line_;
234 
235   // Error information.
236   JSONReader::JsonParseError error_code_;
237   int error_line_;
238   int error_column_;
239 
240   friend class JSONParserTest;
241   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
242   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
243   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
244   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
245   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
246   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
247   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
248   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ReplaceInvalidCharacters);
249   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ReplaceInvalidUTF16EscapeSequence);
250 
251   DISALLOW_COPY_AND_ASSIGN(JSONParser);
252 };
253 
254 // Used when decoding and an invalid utf-8 sequence is encountered.
255 extern const char kUnicodeReplacementString[];
256 
257 }  // namespace internal
258 }  // namespace base
259 
260 #endif  // BASE_JSON_JSON_PARSER_H_
261