1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ 32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ 33 34 #include <stack> 35 #include <string> 36 37 #include <google/protobuf/stubs/common.h> 38 #include <google/protobuf/stubs/stringpiece.h> 39 #include <google/protobuf/stubs/status.h> 40 41 namespace google { 42 namespace util { 43 class Status; 44 } // namespace util 45 46 namespace protobuf { 47 namespace util { 48 namespace converter { 49 50 class ObjectWriter; 51 52 // A JSON parser that can parse a stream of JSON chunks rather than needing the 53 // entire JSON string up front. It is a modified version of the parser in 54 // //net/proto/json/json-parser.h that has been changed in the following ways: 55 // - Changed from recursion to an explicit stack to allow resumption 56 // - Added support for int64 and uint64 numbers 57 // - Removed support for octal and decimal escapes 58 // - Removed support for numeric keys 59 // - Removed support for functions (javascript) 60 // - Removed some lax-comma support (but kept trailing comma support) 61 // - Writes directly to an ObjectWriter rather than using subclassing 62 // 63 // Here is an example usage: 64 // JsonStreamParser parser(ow_.get()); 65 // util::Status result = parser.Parse(chunk1); 66 // result.Update(parser.Parse(chunk2)); 67 // result.Update(parser.FinishParse()); 68 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON"; 69 // 70 // This parser is thread-compatible as long as only one thread is calling a 71 // Parse() method at a time. 72 class LIBPROTOBUF_EXPORT JsonStreamParser { 73 public: 74 // Creates a JsonStreamParser that will write to the given ObjectWriter. 75 explicit JsonStreamParser(ObjectWriter* ow); 76 virtual ~JsonStreamParser(); 77 78 // Parses a UTF-8 encoded JSON string from a StringPiece. 79 util::Status Parse(StringPiece json); 80 81 82 // Finish parsing the JSON string. 83 util::Status FinishParse(); 84 85 86 private: 87 enum TokenType { 88 BEGIN_STRING, // " or ' 89 BEGIN_NUMBER, // - or digit 90 BEGIN_TRUE, // true 91 BEGIN_FALSE, // false 92 BEGIN_NULL, // null 93 BEGIN_OBJECT, // { 94 END_OBJECT, // } 95 BEGIN_ARRAY, // [ 96 END_ARRAY, // ] 97 ENTRY_SEPARATOR, // : 98 VALUE_SEPARATOR, // , 99 BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit 100 UNKNOWN // Unknown token or we ran out of the stream. 101 }; 102 103 enum ParseType { 104 VALUE, // Expects a {, [, true, false, null, string or number 105 OBJ_MID, // Expects a ',' or } 106 ENTRY, // Expects a key or } 107 ENTRY_MID, // Expects a : 108 ARRAY_VALUE, // Expects a value or ] 109 ARRAY_MID // Expects a ',' or ] 110 }; 111 112 // Holds the result of parsing a number 113 struct NumberResult { 114 enum Type { DOUBLE, INT, UINT }; 115 Type type; 116 union { 117 double double_val; 118 int64 int_val; 119 uint64 uint_val; 120 }; 121 }; 122 123 // Parses a single chunk of JSON, returning an error if the JSON was invalid. 124 util::Status ParseChunk(StringPiece json); 125 126 // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs 127 // out of data. If we unexpectedly run out of p_ we push the latest back onto 128 // the stack and return. 129 util::Status RunParser(); 130 131 // Parses a value from p_ and writes it to ow_. 132 // A value may be an object, array, true, false, null, string or number. 133 util::Status ParseValue(TokenType type); 134 135 // Parses a string and writes it out to the ow_. 136 util::Status ParseString(); 137 138 // Parses a string, storing the result in parsed_. 139 util::Status ParseStringHelper(); 140 141 // This function parses unicode escape sequences in strings. It returns an 142 // error when there's a parsing error, either the size is not the expected 143 // size or a character is not a hex digit. When it returns str will contain 144 // what has been successfully parsed so far. 145 util::Status ParseUnicodeEscape(); 146 147 // Expects p_ to point to a JSON number, writes the number to the writer using 148 // the appropriate Render method based on the type of number. 149 util::Status ParseNumber(); 150 151 // Parse a number into a NumberResult, reporting an error if no number could 152 // be parsed. This method will try to parse into a uint64, int64, or double 153 // based on whether the number was positive or negative or had a decimal 154 // component. 155 util::Status ParseNumberHelper(NumberResult* result); 156 157 // Handles a { during parsing of a value. 158 util::Status HandleBeginObject(); 159 160 // Parses from the ENTRY state. 161 util::Status ParseEntry(TokenType type); 162 163 // Parses from the ENTRY_MID state. 164 util::Status ParseEntryMid(TokenType type); 165 166 // Parses from the OBJ_MID state. 167 util::Status ParseObjectMid(TokenType type); 168 169 // Handles a [ during parsing of a value. 170 util::Status HandleBeginArray(); 171 172 // Parses from the ARRAY_VALUE state. 173 util::Status ParseArrayValue(TokenType type); 174 175 // Parses from the ARRAY_MID state. 176 util::Status ParseArrayMid(TokenType type); 177 178 // Expects p_ to point to an unquoted literal 179 util::Status ParseTrue(); 180 util::Status ParseFalse(); 181 util::Status ParseNull(); 182 183 // Report a failure as a util::Status. 184 util::Status ReportFailure(StringPiece message); 185 186 // Report a failure due to an UNKNOWN token type. We check if we hit the 187 // end of the stream and if we're finishing or not to detect what type of 188 // status to return in this case. 189 util::Status ReportUnknown(StringPiece message); 190 191 // Advance p_ past all whitespace or until the end of the string. 192 void SkipWhitespace(); 193 194 // Advance p_ one UTF-8 character 195 void Advance(); 196 197 // Expects p_ to point to the beginning of a key. 198 util::Status ParseKey(); 199 200 // Return the type of the next token at p_. 201 TokenType GetNextTokenType(); 202 203 // The object writer to write parse events to. 204 ObjectWriter* ow_; 205 206 // The stack of parsing we still need to do. When the stack runs empty we will 207 // have parsed a single value from the root (e.g. an object or list). 208 std::stack<ParseType> stack_; 209 210 // Contains any leftover text from a previous chunk that we weren't able to 211 // fully parse, for example the start of a key or number. 212 string leftover_; 213 214 // The current chunk of JSON being parsed. Primarily used for providing 215 // context during error reporting. 216 StringPiece json_; 217 218 // A pointer within the current JSON being parsed, used to track location. 219 StringPiece p_; 220 221 // Stores the last key read, as we separate parsing of keys and values. 222 StringPiece key_; 223 224 // Storage for key_ if we need to keep ownership, for example between chunks 225 // or if the key was unescaped from a JSON string. 226 string key_storage_; 227 228 // True during the FinishParse() call, so we know that any errors are fatal. 229 // For example an unterminated string will normally result in cancelling and 230 // trying during the next chunk, but during FinishParse() it is an error. 231 bool finishing_; 232 233 // String we parsed during a call to ParseStringHelper(). 234 StringPiece parsed_; 235 236 // Storage for the string we parsed. This may be empty if the string was able 237 // to be parsed directly from the input. 238 string parsed_storage_; 239 240 // The character that opened the string, either ' or ". 241 // A value of 0 indicates that string parsing is not in process. 242 char string_open_; 243 244 // Storage for the chunk that are being parsed in ParseChunk(). 245 string chunk_storage_; 246 247 // Whether to allow non UTF-8 encoded input and replace invalid code points. 248 bool coerce_to_utf8_; 249 250 GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser); 251 }; 252 253 } // namespace converter 254 } // namespace util 255 } // namespace protobuf 256 257 } // namespace google 258 #endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ 259