1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ 32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ 33 34 #include <stack> 35 #include <string> 36 37 #include <google/protobuf/stubs/common.h> 38 #include <google/protobuf/stubs/strutil.h> 39 #include <google/protobuf/stubs/status.h> 40 41 #include <google/protobuf/port_def.inc> 42 43 namespace google { 44 namespace protobuf { 45 namespace util { 46 namespace converter { 47 48 class ObjectWriter; 49 50 // A JSON parser that can parse a stream of JSON chunks rather than needing the 51 // entire JSON string up front. It is a modified version of the parser in 52 // //net/proto/json/json-parser.h that has been changed in the following ways: 53 // - Changed from recursion to an explicit stack to allow resumption 54 // - Added support for int64 and uint64 numbers 55 // - Removed support for octal and decimal escapes 56 // - Removed support for numeric keys 57 // - Removed support for functions (javascript) 58 // - Removed some lax-comma support (but kept trailing comma support) 59 // - Writes directly to an ObjectWriter rather than using subclassing 60 // 61 // Here is an example usage: 62 // JsonStreamParser parser(ow_.get()); 63 // util::Status result = parser.Parse(chunk1); 64 // result.Update(parser.Parse(chunk2)); 65 // result.Update(parser.FinishParse()); 66 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON"; 67 // 68 // This parser is thread-compatible as long as only one thread is calling a 69 // Parse() method at a time. 70 class PROTOBUF_EXPORT JsonStreamParser { 71 public: 72 // Creates a JsonStreamParser that will write to the given ObjectWriter. 73 explicit JsonStreamParser(ObjectWriter* ow); 74 virtual ~JsonStreamParser(); 75 76 // Parses a UTF-8 encoded JSON string from a StringPiece. 77 util::Status Parse(StringPiece json); 78 79 80 // Finish parsing the JSON string. 81 util::Status FinishParse(); 82 83 84 // Sets the max recursion depth of JSON message to be deserialized. JSON 85 // messages over this depth will fail to be deserialized. 86 // Default value is 100. set_max_recursion_depth(int max_depth)87 void set_max_recursion_depth(int max_depth) { 88 max_recursion_depth_ = max_depth; 89 } 90 91 private: 92 friend class JsonStreamParserTest; 93 // Return the current recursion depth. recursion_depth()94 int recursion_depth() { return recursion_depth_; } 95 96 enum TokenType { 97 BEGIN_STRING, // " or ' 98 BEGIN_NUMBER, // - or digit 99 BEGIN_TRUE, // true 100 BEGIN_FALSE, // false 101 BEGIN_NULL, // null 102 BEGIN_OBJECT, // { 103 END_OBJECT, // } 104 BEGIN_ARRAY, // [ 105 END_ARRAY, // ] 106 ENTRY_SEPARATOR, // : 107 VALUE_SEPARATOR, // , 108 BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit 109 UNKNOWN // Unknown token or we ran out of the stream. 110 }; 111 112 enum ParseType { 113 VALUE, // Expects a {, [, true, false, null, string or number 114 OBJ_MID, // Expects a ',' or } 115 ENTRY, // Expects a key or } 116 ENTRY_MID, // Expects a : 117 ARRAY_VALUE, // Expects a value or ] 118 ARRAY_MID // Expects a ',' or ] 119 }; 120 121 // Holds the result of parsing a number 122 struct NumberResult { 123 enum Type { DOUBLE, INT, UINT }; 124 Type type; 125 union { 126 double double_val; 127 int64 int_val; 128 uint64 uint_val; 129 }; 130 }; 131 132 // Parses a single chunk of JSON, returning an error if the JSON was invalid. 133 util::Status ParseChunk(StringPiece chunk); 134 135 // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs 136 // out of data. If we unexpectedly run out of p_ we push the latest back onto 137 // the stack and return. 138 util::Status RunParser(); 139 140 // Parses a value from p_ and writes it to ow_. 141 // A value may be an object, array, true, false, null, string or number. 142 util::Status ParseValue(TokenType type); 143 144 // Parses a string and writes it out to the ow_. 145 util::Status ParseString(); 146 147 // Parses a string, storing the result in parsed_. 148 util::Status ParseStringHelper(); 149 150 // This function parses unicode escape sequences in strings. It returns an 151 // error when there's a parsing error, either the size is not the expected 152 // size or a character is not a hex digit. When it returns str will contain 153 // what has been successfully parsed so far. 154 util::Status ParseUnicodeEscape(); 155 156 // Expects p_ to point to a JSON number, writes the number to the writer using 157 // the appropriate Render method based on the type of number. 158 util::Status ParseNumber(); 159 160 // Parse a number into a NumberResult, reporting an error if no number could 161 // be parsed. This method will try to parse into a uint64, int64, or double 162 // based on whether the number was positive or negative or had a decimal 163 // component. 164 util::Status ParseNumberHelper(NumberResult* result); 165 166 // Parse a number as double into a NumberResult. 167 util::Status ParseDoubleHelper(const std::string& number, 168 NumberResult* result); 169 170 // Handles a { during parsing of a value. 171 util::Status HandleBeginObject(); 172 173 // Parses from the ENTRY state. 174 util::Status ParseEntry(TokenType type); 175 176 // Parses from the ENTRY_MID state. 177 util::Status ParseEntryMid(TokenType type); 178 179 // Parses from the OBJ_MID state. 180 util::Status ParseObjectMid(TokenType type); 181 182 // Handles a [ during parsing of a value. 183 util::Status HandleBeginArray(); 184 185 // Parses from the ARRAY_VALUE state. 186 util::Status ParseArrayValue(TokenType type); 187 188 // Parses from the ARRAY_MID state. 189 util::Status ParseArrayMid(TokenType type); 190 191 // Expects p_ to point to an unquoted literal 192 util::Status ParseTrue(); 193 util::Status ParseFalse(); 194 util::Status ParseNull(); 195 util::Status ParseEmptyNull(); 196 197 // Whether an empty-null is allowed in the current state. 198 bool IsEmptyNullAllowed(TokenType type); 199 200 // Report a failure as a util::Status. 201 util::Status ReportFailure(StringPiece message); 202 203 // Report a failure due to an UNKNOWN token type. We check if we hit the 204 // end of the stream and if we're finishing or not to detect what type of 205 // status to return in this case. 206 util::Status ReportUnknown(StringPiece message); 207 208 // Helper function to check recursion depth and increment it. It will return 209 // Status::OK if the current depth is allowed. Otherwise an error is returned. 210 // key is used for error reporting. 211 util::Status IncrementRecursionDepth(StringPiece key) const; 212 213 // Advance p_ past all whitespace or until the end of the string. 214 void SkipWhitespace(); 215 216 // Advance p_ one UTF-8 character 217 void Advance(); 218 219 // Expects p_ to point to the beginning of a key. 220 util::Status ParseKey(); 221 222 // Return the type of the next token at p_. 223 TokenType GetNextTokenType(); 224 225 // The object writer to write parse events to. 226 ObjectWriter* ow_; 227 228 // The stack of parsing we still need to do. When the stack runs empty we will 229 // have parsed a single value from the root (e.g. an object or list). 230 std::stack<ParseType> stack_; 231 232 // Contains any leftover text from a previous chunk that we weren't able to 233 // fully parse, for example the start of a key or number. 234 std::string leftover_; 235 236 // The current chunk of JSON being parsed. Primarily used for providing 237 // context during error reporting. 238 StringPiece json_; 239 240 // A pointer within the current JSON being parsed, used to track location. 241 StringPiece p_; 242 243 // Stores the last key read, as we separate parsing of keys and values. 244 StringPiece key_; 245 246 // Storage for key_ if we need to keep ownership, for example between chunks 247 // or if the key was unescaped from a JSON string. 248 std::string key_storage_; 249 250 // True during the FinishParse() call, so we know that any errors are fatal. 251 // For example an unterminated string will normally result in cancelling and 252 // trying during the next chunk, but during FinishParse() it is an error. 253 bool finishing_; 254 255 // String we parsed during a call to ParseStringHelper(). 256 StringPiece parsed_; 257 258 // Storage for the string we parsed. This may be empty if the string was able 259 // to be parsed directly from the input. 260 std::string parsed_storage_; 261 262 // The character that opened the string, either ' or ". 263 // A value of 0 indicates that string parsing is not in process. 264 char string_open_; 265 266 // Storage for the chunk that are being parsed in ParseChunk(). 267 std::string chunk_storage_; 268 269 // Whether to allow non UTF-8 encoded input and replace invalid code points. 270 bool coerce_to_utf8_; 271 272 // Replacement character for invalid UTF-8 code points. 273 std::string utf8_replacement_character_; 274 275 // Whether allows empty string represented null array value or object entry 276 // value. 277 bool allow_empty_null_; 278 279 // Whether unquoted object keys can contain embedded non-alphanumeric 280 // characters when this is unambiguous for parsing. 281 bool allow_permissive_key_naming_; 282 283 // Whether allows out-of-range floating point numbers or reject them. 284 bool loose_float_number_conversion_; 285 286 // Tracks current recursion depth. 287 mutable int recursion_depth_; 288 289 // Maximum allowed recursion depth. 290 int max_recursion_depth_; 291 292 GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser); 293 }; 294 295 } // namespace converter 296 } // namespace util 297 } // namespace protobuf 298 } // namespace google 299 300 #include <google/protobuf/port_undef.inc> 301 302 #endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ 303