• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_JSON_JSON_PARSER_H_
6 #define BASE_JSON_JSON_PARSER_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <memory>
12 #include <optional>
13 #include <string>
14 #include <string_view>
15 #include <utility>
16 
17 #include "base/base_export.h"
18 #include "base/compiler_specific.h"
19 #include "base/gtest_prod_util.h"
20 #include "base/json/json_common.h"
21 #include "base/third_party/icu/icu_utf.h"
22 #include "base/values.h"
23 
24 namespace base {
25 
26 class Value;
27 
28 namespace internal {
29 
30 class JSONParserTest;
31 
32 // The implementation behind the JSONReader interface. This class is not meant
33 // to be used directly; it encapsulates logic that need not be exposed publicly.
34 //
35 // This parser guarantees O(n) time through the input string. Iteration happens
36 // on the byte level, with the functions ConsumeChars() and ConsumeChar(). The
37 // conversion from byte to JSON token happens without advancing the parser in
38 // GetNextToken/ParseToken, that is tokenization operates on the current parser
39 // position without advancing.
40 //
41 // Built on top of these are a family of Consume functions that iterate
42 // internally. Invariant: on entry of a Consume function, the parser is wound
43 // to the first byte of a valid JSON token. On exit, it is on the first byte
44 // after the token that was just consumed, which would likely be the first byte
45 // of the next token.
46 class BASE_EXPORT JSONParser {
47  public:
48   // Error codes during parsing.
49   enum JsonParseError {
50     JSON_NO_ERROR = base::ValueDeserializer::kErrorCodeNoError,
51     JSON_SYNTAX_ERROR = base::ValueDeserializer::kErrorCodeInvalidFormat,
52     JSON_INVALID_ESCAPE,
53     JSON_UNEXPECTED_TOKEN,
54     JSON_TRAILING_COMMA,
55     JSON_TOO_MUCH_NESTING,
56     JSON_UNEXPECTED_DATA_AFTER_ROOT,
57     JSON_UNSUPPORTED_ENCODING,
58     JSON_UNQUOTED_DICTIONARY_KEY,
59     JSON_UNREPRESENTABLE_NUMBER,
60     JSON_PARSE_ERROR_COUNT
61   };
62 
63   // String versions of parse error codes.
64   static const char kSyntaxError[];
65   static const char kInvalidEscape[];
66   static const char kUnexpectedToken[];
67   static const char kTrailingComma[];
68   static const char kTooMuchNesting[];
69   static const char kUnexpectedDataAfterRoot[];
70   static const char kUnsupportedEncoding[];
71   static const char kUnquotedDictionaryKey[];
72   static const char kUnrepresentableNumber[];
73 
74   explicit JSONParser(int options, size_t max_depth = kAbsoluteMaxDepth);
75 
76   JSONParser(const JSONParser&) = delete;
77   JSONParser& operator=(const JSONParser&) = delete;
78 
79   ~JSONParser();
80 
81   // Parses the input string according to the set options and returns the
82   // result as a Value.
83   // Wrap this in base::FooValue::From() to check the Value is of type Foo and
84   // convert to a FooValue at the same time.
85   std::optional<Value> Parse(std::string_view input);
86 
87   // Returns the error code.
88   JsonParseError error_code() const;
89 
90   // Returns the human-friendly error message.
91   std::string GetErrorMessage() const;
92 
93   // Returns the error line number if parse error happened. Otherwise always
94   // returns 0.
95   int error_line() const;
96 
97   // Returns the error column number if parse error happened. Otherwise always
98   // returns 0.
99   int error_column() const;
100 
101  private:
102   enum Token {
103     T_OBJECT_BEGIN,           // {
104     T_OBJECT_END,             // }
105     T_ARRAY_BEGIN,            // [
106     T_ARRAY_END,              // ]
107     T_STRING,
108     T_NUMBER,
109     T_BOOL_TRUE,              // true
110     T_BOOL_FALSE,             // false
111     T_NULL,                   // null
112     T_LIST_SEPARATOR,         // ,
113     T_OBJECT_PAIR_SEPARATOR,  // :
114     T_END_OF_INPUT,
115     T_INVALID_TOKEN,
116   };
117 
118   // Returns the next |count| bytes of the input stream, or nullopt if fewer
119   // than |count| bytes remain.
120   std::optional<std::string_view> PeekChars(size_t count);
121 
122   // Calls PeekChars() with a |count| of 1.
123   std::optional<char> PeekChar();
124 
125   // Returns the next |count| bytes of the input stream, or nullopt if fewer
126   // than |count| bytes remain, and advances the parser position by |count|.
127   std::optional<std::string_view> ConsumeChars(size_t count);
128 
129   // Calls ConsumeChars() with a |count| of 1.
130   std::optional<char> ConsumeChar();
131 
132   // Returns a pointer to the current character position.
133   const char* pos();
134 
135   // Skips over whitespace and comments to find the next token in the stream.
136   // This does not advance the parser for non-whitespace or comment chars.
137   Token GetNextToken();
138 
139   // Consumes whitespace characters and comments until the next non-that is
140   // encountered.
141   void EatWhitespaceAndComments();
142   // Helper function that consumes a comment, assuming that the parser is
143   // currently wound to a '/'.
144   bool EatComment();
145 
146   // Calls GetNextToken() and then ParseToken().
147   std::optional<Value> ParseNextToken();
148 
149   // Takes a token that represents the start of a Value ("a structural token"
150   // in RFC terms) and consumes it, returning the result as a Value.
151   std::optional<Value> ParseToken(Token token);
152 
153   // Assuming that the parser is currently wound to '{', this parses a JSON
154   // object into a Value.
155   std::optional<Value> ConsumeDictionary();
156 
157   // Assuming that the parser is wound to '[', this parses a JSON list into a
158   // Value.
159   std::optional<Value> ConsumeList();
160 
161   // Calls through ConsumeStringRaw and wraps it in a value.
162   std::optional<Value> ConsumeString();
163 
164   // Assuming that the parser is wound to a double quote, this parses a string,
165   // decoding any escape sequences and validating UTF-8. Returns the string on
166   // success or std::nullopt on error, with error information set.
167   std::optional<std::string> ConsumeStringRaw();
168 
169   enum class StringResult {
170     // Parsing stopped because of invalid input. Error information has been set.
171     // The caller should return failure.
172     kError,
173     // Parsing stopped because the string is finished. The parser is wound to
174     // just paste the closing quote. The caller should stop parsing the string.
175     kDone,
176     // Parsing stopped because of invalid Unicode which should be replaced with
177     // a replacement character. The parser is wound to just past the input that
178     // should be a replacement character. The caller should add a replacement
179     // character and continue parsing.
180     kReplacementCharacter,
181     // Parsing stopped because of an escape sequence. The parser is wound to
182     // just past the backslash. The caller should consume the escape sequence
183     // and continue parsing.
184     kEscape,
185   };
186 
187   // Consumes the portion of a JavaScript string which may be copied to the
188   // input with no conversions, stopping at one of the events above. Returns the
189   // reason parsing stopped and the data that was consumed. This should be
190   // called in a loop, handling all the cases above until reaching kDone.
191   std::pair<StringResult, std::string_view> ConsumeStringPart();
192 
193   // Helper function for ConsumeStringRaw() that consumes the next four or 10
194   // bytes (parser is wound to the first character of a HEX sequence, with the
195   // potential for consuming another \uXXXX for a surrogate). Returns true on
196   // success and places the code point |out_code_point|, and false on failure.
197   bool DecodeUTF16(base_icu::UChar32* out_code_point);
198 
199   // Assuming that the parser is wound to the start of a valid JSON number,
200   // this parses and converts it to either an int or double value.
201   std::optional<Value> ConsumeNumber();
202   // Helper that reads characters that are ints. Returns true if a number was
203   // read and false on error.
204   bool ReadInt(bool allow_leading_zeros);
205 
206   // Consumes the literal values of |true|, |false|, and |null|, assuming the
207   // parser is wound to the first character of any of those.
208   std::optional<Value> ConsumeLiteral();
209 
210   // Helper function that returns true if the byte squence |match| can be
211   // consumed at the current parser position. Returns false if there are fewer
212   // than |match|-length bytes or if the sequence does not match, and the
213   // parser state is unchanged.
214   bool ConsumeIfMatch(std::string_view match);
215 
216   // Sets the error information to |code| at the current column, based on
217   // |index_| and |index_last_line_|, with an optional positive/negative
218   // adjustment by |column_adjust|.
219   void ReportError(JsonParseError code, int column_adjust);
220 
221   // Given the line and column number of an error, formats one of the error
222   // message contants from json_reader.h for human display.
223   static std::string FormatErrorMessage(int line, int column,
224                                         const std::string& description);
225 
226   // base::JSONParserOptions that control parsing.
227   const int options_;
228 
229   // Maximum depth to parse.
230   const size_t max_depth_;
231 
232   // The input stream being parsed. Note: Not guaranteed to NUL-terminated.
233   std::string_view input_;
234 
235   // The index in the input stream to which the parser is wound.
236   size_t index_;
237 
238   // The number of times the parser has recursed (current stack depth).
239   size_t stack_depth_;
240 
241   // The line number that the parser is at currently.
242   int line_number_;
243 
244   // The last value of |index_| on the previous line.
245   size_t index_last_line_;
246 
247   // Error information.
248   JsonParseError error_code_;
249   int error_line_;
250   int error_column_;
251 
252   friend class JSONParserTest;
253   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
254   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
255   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
256   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
257   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
258   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
259   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
260 };
261 
262 // Used when decoding and an invalid utf-8 sequence is encountered.
263 BASE_EXPORT extern const char kUnicodeReplacementString[];
264 
265 }  // namespace internal
266 }  // namespace base
267 
268 #endif  // BASE_JSON_JSON_PARSER_H_
269