• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
33 
34 #include <stack>
35 #include <string>
36 
37 #include <google/protobuf/stubs/common.h>
38 #include <google/protobuf/stubs/strutil.h>
39 #include <google/protobuf/stubs/status.h>
40 
41 #include <google/protobuf/port_def.inc>
42 
43 namespace google {
44 namespace protobuf {
45 namespace util {
46 namespace converter {
47 
48 class ObjectWriter;
49 
50 // A JSON parser that can parse a stream of JSON chunks rather than needing the
51 // entire JSON string up front. It is a modified version of the parser in
52 // //net/proto/json/json-parser.h that has been changed in the following ways:
53 // - Changed from recursion to an explicit stack to allow resumption
54 // - Added support for int64 and uint64 numbers
55 // - Removed support for octal and decimal escapes
56 // - Removed support for numeric keys
57 // - Removed support for functions (javascript)
58 // - Removed some lax-comma support (but kept trailing comma support)
59 // - Writes directly to an ObjectWriter rather than using subclassing
60 //
61 // Here is an example usage:
62 // JsonStreamParser parser(ow_.get());
63 // util::Status result = parser.Parse(chunk1);
64 // result.Update(parser.Parse(chunk2));
65 // result.Update(parser.FinishParse());
66 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON";
67 //
68 // This parser is thread-compatible as long as only one thread is calling a
69 // Parse() method at a time.
70 class PROTOBUF_EXPORT JsonStreamParser {
71  public:
72   // Creates a JsonStreamParser that will write to the given ObjectWriter.
73   explicit JsonStreamParser(ObjectWriter* ow);
74   virtual ~JsonStreamParser();
75 
76   // Parses a UTF-8 encoded JSON string from a StringPiece.
77   util::Status Parse(StringPiece json);
78 
79 
80   // Finish parsing the JSON string.
81   util::Status FinishParse();
82 
83 
84   // Sets the max recursion depth of JSON message to be deserialized. JSON
85   // messages over this depth will fail to be deserialized.
86   // Default value is 100.
set_max_recursion_depth(int max_depth)87   void set_max_recursion_depth(int max_depth) {
88     max_recursion_depth_ = max_depth;
89   }
90 
91  private:
92   friend class JsonStreamParserTest;
93   // Return the current recursion depth.
recursion_depth()94   int recursion_depth() { return recursion_depth_; }
95 
96   enum TokenType {
97     BEGIN_STRING,     // " or '
98     BEGIN_NUMBER,     // - or digit
99     BEGIN_TRUE,       // true
100     BEGIN_FALSE,      // false
101     BEGIN_NULL,       // null
102     BEGIN_OBJECT,     // {
103     END_OBJECT,       // }
104     BEGIN_ARRAY,      // [
105     END_ARRAY,        // ]
106     ENTRY_SEPARATOR,  // :
107     VALUE_SEPARATOR,  // ,
108     BEGIN_KEY,        // letter, _, $ or digit.  Must begin with non-digit
109     UNKNOWN           // Unknown token or we ran out of the stream.
110   };
111 
112   enum ParseType {
113     VALUE,        // Expects a {, [, true, false, null, string or number
114     OBJ_MID,      // Expects a ',' or }
115     ENTRY,        // Expects a key or }
116     ENTRY_MID,    // Expects a :
117     ARRAY_VALUE,  // Expects a value or ]
118     ARRAY_MID     // Expects a ',' or ]
119   };
120 
121   // Holds the result of parsing a number
122   struct NumberResult {
123     enum Type { DOUBLE, INT, UINT };
124     Type type;
125     union {
126       double double_val;
127       int64 int_val;
128       uint64 uint_val;
129     };
130   };
131 
132   // Parses a single chunk of JSON, returning an error if the JSON was invalid.
133   util::Status ParseChunk(StringPiece chunk);
134 
135   // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs
136   // out of data. If we unexpectedly run out of p_ we push the latest back onto
137   // the stack and return.
138   util::Status RunParser();
139 
140   // Parses a value from p_ and writes it to ow_.
141   // A value may be an object, array, true, false, null, string or number.
142   util::Status ParseValue(TokenType type);
143 
144   // Parses a string and writes it out to the ow_.
145   util::Status ParseString();
146 
147   // Parses a string, storing the result in parsed_.
148   util::Status ParseStringHelper();
149 
150   // This function parses unicode escape sequences in strings. It returns an
151   // error when there's a parsing error, either the size is not the expected
152   // size or a character is not a hex digit.  When it returns str will contain
153   // what has been successfully parsed so far.
154   util::Status ParseUnicodeEscape();
155 
156   // Expects p_ to point to a JSON number, writes the number to the writer using
157   // the appropriate Render method based on the type of number.
158   util::Status ParseNumber();
159 
160   // Parse a number into a NumberResult, reporting an error if no number could
161   // be parsed. This method will try to parse into a uint64, int64, or double
162   // based on whether the number was positive or negative or had a decimal
163   // component.
164   util::Status ParseNumberHelper(NumberResult* result);
165 
166   // Parse a number as double into a NumberResult.
167   util::Status ParseDoubleHelper(const std::string& number,
168                                    NumberResult* result);
169 
170   // Handles a { during parsing of a value.
171   util::Status HandleBeginObject();
172 
173   // Parses from the ENTRY state.
174   util::Status ParseEntry(TokenType type);
175 
176   // Parses from the ENTRY_MID state.
177   util::Status ParseEntryMid(TokenType type);
178 
179   // Parses from the OBJ_MID state.
180   util::Status ParseObjectMid(TokenType type);
181 
182   // Handles a [ during parsing of a value.
183   util::Status HandleBeginArray();
184 
185   // Parses from the ARRAY_VALUE state.
186   util::Status ParseArrayValue(TokenType type);
187 
188   // Parses from the ARRAY_MID state.
189   util::Status ParseArrayMid(TokenType type);
190 
191   // Expects p_ to point to an unquoted literal
192   util::Status ParseTrue();
193   util::Status ParseFalse();
194   util::Status ParseNull();
195   util::Status ParseEmptyNull();
196 
197   // Whether an empty-null is allowed in the current state.
198   bool IsEmptyNullAllowed(TokenType type);
199 
200   // Report a failure as a util::Status.
201   util::Status ReportFailure(StringPiece message);
202 
203   // Report a failure due to an UNKNOWN token type. We check if we hit the
204   // end of the stream and if we're finishing or not to detect what type of
205   // status to return in this case.
206   util::Status ReportUnknown(StringPiece message);
207 
208   // Helper function to check recursion depth and increment it. It will return
209   // Status::OK if the current depth is allowed. Otherwise an error is returned.
210   // key is used for error reporting.
211   util::Status IncrementRecursionDepth(StringPiece key) const;
212 
213   // Advance p_ past all whitespace or until the end of the string.
214   void SkipWhitespace();
215 
216   // Advance p_ one UTF-8 character
217   void Advance();
218 
219   // Expects p_ to point to the beginning of a key.
220   util::Status ParseKey();
221 
222   // Return the type of the next token at p_.
223   TokenType GetNextTokenType();
224 
225   // The object writer to write parse events to.
226   ObjectWriter* ow_;
227 
228   // The stack of parsing we still need to do. When the stack runs empty we will
229   // have parsed a single value from the root (e.g. an object or list).
230   std::stack<ParseType> stack_;
231 
232   // Contains any leftover text from a previous chunk that we weren't able to
233   // fully parse, for example the start of a key or number.
234   std::string leftover_;
235 
236   // The current chunk of JSON being parsed. Primarily used for providing
237   // context during error reporting.
238   StringPiece json_;
239 
240   // A pointer within the current JSON being parsed, used to track location.
241   StringPiece p_;
242 
243   // Stores the last key read, as we separate parsing of keys and values.
244   StringPiece key_;
245 
246   // Storage for key_ if we need to keep ownership, for example between chunks
247   // or if the key was unescaped from a JSON string.
248   std::string key_storage_;
249 
250   // True during the FinishParse() call, so we know that any errors are fatal.
251   // For example an unterminated string will normally result in cancelling and
252   // trying during the next chunk, but during FinishParse() it is an error.
253   bool finishing_;
254 
255   // String we parsed during a call to ParseStringHelper().
256   StringPiece parsed_;
257 
258   // Storage for the string we parsed. This may be empty if the string was able
259   // to be parsed directly from the input.
260   std::string parsed_storage_;
261 
262   // The character that opened the string, either ' or ".
263   // A value of 0 indicates that string parsing is not in process.
264   char string_open_;
265 
266   // Storage for the chunk that are being parsed in ParseChunk().
267   std::string chunk_storage_;
268 
269   // Whether to allow non UTF-8 encoded input and replace invalid code points.
270   bool coerce_to_utf8_;
271 
272   // Replacement character for invalid UTF-8 code points.
273   std::string utf8_replacement_character_;
274 
275   // Whether allows empty string represented null array value or object entry
276   // value.
277   bool allow_empty_null_;
278 
279   // Whether unquoted object keys can contain embedded non-alphanumeric
280   // characters when this is unambiguous for parsing.
281   bool allow_permissive_key_naming_;
282 
283   // Whether allows out-of-range floating point numbers or reject them.
284   bool loose_float_number_conversion_;
285 
286   // Tracks current recursion depth.
287   mutable int recursion_depth_;
288 
289   // Maximum allowed recursion depth.
290   int max_recursion_depth_;
291 
292   GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser);
293 };
294 
295 }  // namespace converter
296 }  // namespace util
297 }  // namespace protobuf
298 }  // namespace google
299 
300 #include <google/protobuf/port_undef.inc>
301 
302 #endif  // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
303