• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include <google/protobuf/util/internal/json_stream_parser.h>
32 
33 #include <algorithm>
34 #include <cctype>
35 #include <cerrno>
36 #include <cstdlib>
37 #include <cstring>
38 #include <memory>
39 
40 #include <google/protobuf/stubs/logging.h>
41 #include <google/protobuf/stubs/common.h>
42 #include <google/protobuf/stubs/strutil.h>
43 
44 #include <google/protobuf/util/internal/object_writer.h>
45 #include <google/protobuf/util/internal/json_escaping.h>
46 #include <google/protobuf/stubs/mathlimits.h>
47 
48 
49 namespace google {
50 namespace protobuf {
51 namespace util {
52 
53 // Allow these symbols to be referenced as util::Status, util::error::* in
54 // this file.
55 using util::Status;
56 namespace error {
57 using util::error::CANCELLED;
58 using util::error::INTERNAL;
59 using util::error::INVALID_ARGUMENT;
60 }  // namespace error
61 
62 namespace converter {
63 
64 // Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
65 static const int kUnicodeEscapedLength = 6;
66 
67 static const int kDefaultMaxRecursionDepth = 100;
68 
69 // Length of the true, false, and null literals.
70 static const int true_len = strlen("true");
71 static const int false_len = strlen("false");
72 static const int null_len = strlen("null");
73 
IsLetter(char c)74 inline bool IsLetter(char c) {
75   return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') ||
76          (c == '$');
77 }
78 
IsAlphanumeric(char c)79 inline bool IsAlphanumeric(char c) {
80   return IsLetter(c) || ('0' <= c && c <= '9');
81 }
82 
ConsumeKey(StringPiece * input,StringPiece * key)83 static bool ConsumeKey(StringPiece* input, StringPiece* key) {
84   if (input->empty() || !IsLetter((*input)[0])) return false;
85   int len = 1;
86   for (; len < input->size(); ++len) {
87     if (!IsAlphanumeric((*input)[len])) {
88       break;
89     }
90   }
91   *key = StringPiece(input->data(), len);
92   *input = StringPiece(input->data() + len, input->size() - len);
93   return true;
94 }
95 
MatchKey(StringPiece input)96 static bool MatchKey(StringPiece input) {
97   return !input.empty() && IsLetter(input[0]);
98 }
99 
JsonStreamParser(ObjectWriter * ow)100 JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
101     : ow_(ow),
102       stack_(),
103       leftover_(),
104       json_(),
105       p_(),
106       key_(),
107       key_storage_(),
108       finishing_(false),
109       parsed_(),
110       parsed_storage_(),
111       string_open_(0),
112       chunk_storage_(),
113       coerce_to_utf8_(false),
114       allow_empty_null_(false),
115       loose_float_number_conversion_(false),
116       recursion_depth_(0),
117       max_recursion_depth_(kDefaultMaxRecursionDepth) {
118   // Initialize the stack with a single value to be parsed.
119   stack_.push(VALUE);
120 }
121 
~JsonStreamParser()122 JsonStreamParser::~JsonStreamParser() {}
123 
124 
Parse(StringPiece json)125 util::Status JsonStreamParser::Parse(StringPiece json) {
126   StringPiece chunk = json;
127   // If we have leftovers from a previous chunk, append the new chunk to it
128   // and create a new StringPiece pointing at the string's data. This could
129   // be large but we rely on the chunks to be small, assuming they are
130   // fragments of a Cord.
131   if (!leftover_.empty()) {
132     // Don't point chunk to leftover_ because leftover_ will be updated in
133     // ParseChunk(chunk).
134     chunk_storage_.swap(leftover_);
135     StrAppend(&chunk_storage_, json);
136     chunk = StringPiece(chunk_storage_);
137   }
138 
139   // Find the structurally valid UTF8 prefix and parse only that.
140   int n = internal::UTF8SpnStructurallyValid(chunk);
141   if (n > 0) {
142     util::Status status = ParseChunk(chunk.substr(0, n));
143 
144     // Any leftover characters are stashed in leftover_ for later parsing when
145     // there is more data available.
146     StrAppend(&leftover_, chunk.substr(n));
147     return status;
148   } else {
149     leftover_.assign(chunk.data(), chunk.size());
150     return util::Status();
151   }
152 }
153 
FinishParse()154 util::Status JsonStreamParser::FinishParse() {
155   // If we do not expect anything and there is nothing left to parse we're all
156   // done.
157   if (stack_.empty() && leftover_.empty()) {
158     return util::Status();
159   }
160 
161   // Storage for UTF8-coerced string.
162   std::unique_ptr<char[]> utf8;
163   if (coerce_to_utf8_) {
164     utf8.reset(new char[leftover_.size()]);
165     char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' ');
166     p_ = json_ = StringPiece(coerced, leftover_.size());
167   } else {
168     p_ = json_ = leftover_;
169     if (!internal::IsStructurallyValidUTF8(leftover_)) {
170       return ReportFailure("Encountered non UTF-8 code points.");
171     }
172   }
173 
174   // Parse the remainder in finishing mode, which reports errors for things like
175   // unterminated strings or unknown tokens that would normally be retried.
176   finishing_ = true;
177   util::Status result = RunParser();
178   if (result.ok()) {
179     SkipWhitespace();
180     if (!p_.empty()) {
181       result = ReportFailure("Parsing terminated before end of input.");
182     }
183   }
184   return result;
185 }
186 
ParseChunk(StringPiece chunk)187 util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
188   // Do not do any work if the chunk is empty.
189   if (chunk.empty()) return util::Status();
190 
191   p_ = json_ = chunk;
192 
193   finishing_ = false;
194   util::Status result = RunParser();
195   if (!result.ok()) return result;
196 
197   SkipWhitespace();
198   if (p_.empty()) {
199     // If we parsed everything we had, clear the leftover.
200     leftover_.clear();
201   } else {
202     // If we do not expect anything i.e. stack is empty, and we have non-empty
203     // string left to parse, we report an error.
204     if (stack_.empty()) {
205       return ReportFailure("Parsing terminated before end of input.");
206     }
207     // If we expect future data i.e. stack is non-empty, and we have some
208     // unparsed data left, we save it for later parse.
209     leftover_ = std::string(p_);
210   }
211   return util::Status();
212 }
213 
RunParser()214 util::Status JsonStreamParser::RunParser() {
215   while (!stack_.empty()) {
216     ParseType type = stack_.top();
217     TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING;
218     stack_.pop();
219     util::Status result;
220     switch (type) {
221       case VALUE:
222         result = ParseValue(t);
223         break;
224 
225       case OBJ_MID:
226         result = ParseObjectMid(t);
227         break;
228 
229       case ENTRY:
230         result = ParseEntry(t);
231         break;
232 
233       case ENTRY_MID:
234         result = ParseEntryMid(t);
235         break;
236 
237       case ARRAY_VALUE:
238         result = ParseArrayValue(t);
239         break;
240 
241       case ARRAY_MID:
242         result = ParseArrayMid(t);
243         break;
244 
245       default:
246         result = util::Status(util::error::INTERNAL,
247                               StrCat("Unknown parse type: ", type));
248         break;
249     }
250     if (!result.ok()) {
251       // If we were cancelled, save our state and try again later.
252       if (!finishing_ &&
253           result == util::Status(util::error::CANCELLED, "")) {
254         stack_.push(type);
255         // If we have a key we still need to render, make sure to save off the
256         // contents in our own storage.
257         if (!key_.empty() && key_storage_.empty()) {
258           StrAppend(&key_storage_, key_);
259           key_ = StringPiece(key_storage_);
260         }
261         result = util::Status();
262       }
263       return result;
264     }
265   }
266   return util::Status();
267 }
268 
ParseValue(TokenType type)269 util::Status JsonStreamParser::ParseValue(TokenType type) {
270   switch (type) {
271     case BEGIN_OBJECT:
272       return HandleBeginObject();
273     case BEGIN_ARRAY:
274       return HandleBeginArray();
275     case BEGIN_STRING:
276       return ParseString();
277     case BEGIN_NUMBER:
278       return ParseNumber();
279     case BEGIN_TRUE:
280       return ParseTrue();
281     case BEGIN_FALSE:
282       return ParseFalse();
283     case BEGIN_NULL:
284       return ParseNull();
285     case UNKNOWN:
286       return ReportUnknown("Expected a value.");
287     default: {
288       if (allow_empty_null_ && IsEmptyNullAllowed(type)) {
289         return ParseEmptyNull();
290       }
291 
292       // Special case for having been cut off while parsing, wait for more data.
293       // This handles things like 'fals' being at the end of the string, we
294       // don't know if the next char would be e, completing it, or something
295       // else, making it invalid.
296       if (!finishing_ && p_.length() < false_len) {
297         return util::Status(util::error::CANCELLED, "");
298       }
299       return ReportFailure("Unexpected token.");
300     }
301   }
302 }
303 
ParseString()304 util::Status JsonStreamParser::ParseString() {
305   util::Status result = ParseStringHelper();
306   if (result.ok()) {
307     ow_->RenderString(key_, parsed_);
308     key_ = StringPiece();
309     parsed_ = StringPiece();
310     parsed_storage_.clear();
311   }
312   return result;
313 }
314 
ParseStringHelper()315 util::Status JsonStreamParser::ParseStringHelper() {
316   // If we haven't seen the start quote, grab it and remember it for later.
317   if (string_open_ == 0) {
318     string_open_ = *p_.data();
319     GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\'');
320     Advance();
321   }
322   // Track where we last copied data from so we can minimize copying.
323   const char* last = p_.data();
324   while (!p_.empty()) {
325     const char* data = p_.data();
326     if (*data == '\\') {
327       // We're about to handle an escape, copy all bytes from last to data.
328       if (last < data) {
329         parsed_storage_.append(last, data - last);
330       }
331       // If we ran out of string after the \, cancel or report an error
332       // depending on if we expect more data later.
333       if (p_.length() == 1) {
334         if (!finishing_) {
335           return util::Status(util::error::CANCELLED, "");
336         }
337         return ReportFailure("Closing quote expected in string.");
338       }
339       // Parse a unicode escape if we found \u in the string.
340       if (data[1] == 'u') {
341         util::Status result = ParseUnicodeEscape();
342         if (!result.ok()) {
343           return result;
344         }
345         // Move last pointer past the unicode escape and continue.
346         last = p_.data();
347         continue;
348       }
349       // Handle the standard set of backslash-escaped characters.
350       switch (data[1]) {
351         case 'b':
352           parsed_storage_.push_back('\b');
353           break;
354         case 'f':
355           parsed_storage_.push_back('\f');
356           break;
357         case 'n':
358           parsed_storage_.push_back('\n');
359           break;
360         case 'r':
361           parsed_storage_.push_back('\r');
362           break;
363         case 't':
364           parsed_storage_.push_back('\t');
365           break;
366         case 'v':
367           parsed_storage_.push_back('\v');
368           break;
369         default:
370           parsed_storage_.push_back(data[1]);
371       }
372       // We handled two characters, so advance past them and continue.
373       p_.remove_prefix(2);
374       last = p_.data();
375       continue;
376     }
377     // If we found the closing quote note it, advance past it, and return.
378     if (*data == string_open_) {
379       // If we didn't copy anything, reuse the input buffer.
380       if (parsed_storage_.empty()) {
381         parsed_ = StringPiece(last, data - last);
382       } else {
383         if (last < data) {
384           parsed_storage_.append(last, data - last);
385         }
386         parsed_ = StringPiece(parsed_storage_);
387       }
388       // Clear the quote char so next time we try to parse a string we'll
389       // start fresh.
390       string_open_ = 0;
391       Advance();
392       return util::Status();
393     }
394     // Normal character, just advance past it.
395     Advance();
396   }
397   // If we ran out of characters, copy over what we have so far.
398   if (last < p_.data()) {
399     parsed_storage_.append(last, p_.data() - last);
400   }
401   // If we didn't find the closing quote but we expect more data, cancel for now
402   if (!finishing_) {
403     return util::Status(util::error::CANCELLED, "");
404   }
405   // End of string reached without a closing quote, report an error.
406   string_open_ = 0;
407   return ReportFailure("Closing quote expected in string.");
408 }
409 
410 // Converts a unicode escaped character to a decimal value stored in a char32
411 // for use in UTF8 encoding utility.  We assume that str begins with \uhhhh and
412 // convert that from the hex number to a decimal value.
413 //
414 // There are some security exploits with UTF-8 that we should be careful of:
415 //   - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
416 //   - http://sites/intl-eng/design-guide/core-application
ParseUnicodeEscape()417 util::Status JsonStreamParser::ParseUnicodeEscape() {
418   if (p_.length() < kUnicodeEscapedLength) {
419     if (!finishing_) {
420       return util::Status(util::error::CANCELLED, "");
421     }
422     return ReportFailure("Illegal hex string.");
423   }
424   GOOGLE_DCHECK_EQ('\\', p_.data()[0]);
425   GOOGLE_DCHECK_EQ('u', p_.data()[1]);
426   uint32 code = 0;
427   for (int i = 2; i < kUnicodeEscapedLength; ++i) {
428     if (!isxdigit(p_.data()[i])) {
429       return ReportFailure("Invalid escape sequence.");
430     }
431     code = (code << 4) + hex_digit_to_int(p_.data()[i]);
432   }
433   if (code >= JsonEscaping::kMinHighSurrogate &&
434       code <= JsonEscaping::kMaxHighSurrogate) {
435     if (p_.length() < 2 * kUnicodeEscapedLength) {
436       if (!finishing_) {
437         return util::Status(util::error::CANCELLED, "");
438       }
439       if (!coerce_to_utf8_) {
440         return ReportFailure("Missing low surrogate.");
441       }
442     } else if (p_.data()[kUnicodeEscapedLength] == '\\' &&
443                p_.data()[kUnicodeEscapedLength + 1] == 'u') {
444       uint32 low_code = 0;
445       for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength;
446            ++i) {
447         if (!isxdigit(p_.data()[i])) {
448           return ReportFailure("Invalid escape sequence.");
449         }
450         low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]);
451       }
452       if (low_code >= JsonEscaping::kMinLowSurrogate &&
453           low_code <= JsonEscaping::kMaxLowSurrogate) {
454         // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
455         code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) +
456                JsonEscaping::kMinSupplementaryCodePoint;
457         // Advance past the first code unit escape.
458         p_.remove_prefix(kUnicodeEscapedLength);
459       } else if (!coerce_to_utf8_) {
460         return ReportFailure("Invalid low surrogate.");
461       }
462     } else if (!coerce_to_utf8_) {
463       return ReportFailure("Missing low surrogate.");
464     }
465   }
466   if (!coerce_to_utf8_ && !IsValidCodePoint(code)) {
467     return ReportFailure("Invalid unicode code point.");
468   }
469   char buf[UTFmax];
470   int len = EncodeAsUTF8Char(code, buf);
471   // Advance past the [final] code unit escape.
472   p_.remove_prefix(kUnicodeEscapedLength);
473   parsed_storage_.append(buf, len);
474   return util::Status();
475 }
476 
ParseNumber()477 util::Status JsonStreamParser::ParseNumber() {
478   NumberResult number;
479   util::Status result = ParseNumberHelper(&number);
480   if (result.ok()) {
481     switch (number.type) {
482       case NumberResult::DOUBLE:
483         ow_->RenderDouble(key_, number.double_val);
484         key_ = StringPiece();
485         break;
486 
487       case NumberResult::INT:
488         ow_->RenderInt64(key_, number.int_val);
489         key_ = StringPiece();
490         break;
491 
492       case NumberResult::UINT:
493         ow_->RenderUint64(key_, number.uint_val);
494         key_ = StringPiece();
495         break;
496 
497       default:
498         return ReportFailure("Unable to parse number.");
499     }
500   }
501   return result;
502 }
503 
ParseDoubleHelper(const std::string & number,NumberResult * result)504 util::Status JsonStreamParser::ParseDoubleHelper(const std::string& number,
505                                                  NumberResult* result) {
506   if (!safe_strtod(number, &result->double_val)) {
507     return ReportFailure("Unable to parse number.");
508   }
509   if (!loose_float_number_conversion_ &&
510       !MathLimits<double>::IsFinite(result->double_val)) {
511     return ReportFailure("Number exceeds the range of double.");
512   }
513   result->type = NumberResult::DOUBLE;
514   return util::Status();
515 }
516 
ParseNumberHelper(NumberResult * result)517 util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
518   const char* data = p_.data();
519   int length = p_.length();
520 
521   // Look for the first non-numeric character, or the end of the string.
522   int index = 0;
523   bool floating = false;
524   bool negative = data[index] == '-';
525   // Find the first character that cannot be part of the number. Along the way
526   // detect if the number needs to be parsed as a double.
527   // Note that this restricts numbers to the JSON specification, so for example
528   // we do not support hex or octal notations.
529   for (; index < length; ++index) {
530     char c = data[index];
531     if (isdigit(c)) continue;
532     if (c == '.' || c == 'e' || c == 'E') {
533       floating = true;
534       continue;
535     }
536     if (c == '+' || c == '-' || c == 'x') continue;
537     // Not a valid number character, break out.
538     break;
539   }
540 
541   // If the entire input is a valid number, and we may have more content in the
542   // future, we abort for now and resume when we know more.
543   if (index == length && !finishing_) {
544     return util::Status(util::error::CANCELLED, "");
545   }
546 
547   // Create a string containing just the number, so we can use safe_strtoX
548   std::string number = std::string(p_.substr(0, index));
549 
550   // Floating point number, parse as a double.
551   if (floating) {
552     util::Status status = ParseDoubleHelper(number, result);
553     if (status.ok()) {
554       p_.remove_prefix(index);
555     }
556     return status;
557   }
558 
559   // Positive non-floating point number, parse as a uint64.
560   if (!negative) {
561     // Octal/Hex numbers are not valid JSON values.
562     if (number.length() >= 2 && number[0] == '0') {
563       return ReportFailure("Octal/hex numbers are not valid JSON values.");
564     }
565     if (safe_strtou64(number, &result->uint_val)) {
566       result->type = NumberResult::UINT;
567       p_.remove_prefix(index);
568       return util::Status();
569     } else {
570       // If the value is too large, parse it as double.
571       util::Status status = ParseDoubleHelper(number, result);
572       if (status.ok()) {
573         p_.remove_prefix(index);
574       }
575       return status;
576     }
577   }
578 
579   // Octal/Hex numbers are not valid JSON values.
580   if (number.length() >= 3 && number[1] == '0') {
581     return ReportFailure("Octal/hex numbers are not valid JSON values.");
582   }
583   // Negative non-floating point number, parse as an int64.
584   if (safe_strto64(number, &result->int_val)) {
585     result->type = NumberResult::INT;
586     p_.remove_prefix(index);
587     return util::Status();
588   } else {
589     // If the value is too large, parse it as double.
590     util::Status status = ParseDoubleHelper(number, result);
591     if (status.ok()) {
592       p_.remove_prefix(index);
593     }
594     return status;
595   }
596 }
597 
HandleBeginObject()598 util::Status JsonStreamParser::HandleBeginObject() {
599   GOOGLE_DCHECK_EQ('{', *p_.data());
600   Advance();
601   ow_->StartObject(key_);
602   auto status = IncrementRecursionDepth(key_);
603   if (!status.ok()) {
604     return status;
605   }
606   key_ = StringPiece();
607   stack_.push(ENTRY);
608   return util::Status();
609 }
610 
ParseObjectMid(TokenType type)611 util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
612   if (type == UNKNOWN) {
613     return ReportUnknown("Expected , or } after key:value pair.");
614   }
615 
616   // Object is complete, advance past the comma and render the EndObject.
617   if (type == END_OBJECT) {
618     Advance();
619     ow_->EndObject();
620     --recursion_depth_;
621     return util::Status();
622   }
623   // Found a comma, advance past it and get ready for an entry.
624   if (type == VALUE_SEPARATOR) {
625     Advance();
626     stack_.push(ENTRY);
627     return util::Status();
628   }
629   // Illegal token after key:value pair.
630   return ReportFailure("Expected , or } after key:value pair.");
631 }
632 
ParseEntry(TokenType type)633 util::Status JsonStreamParser::ParseEntry(TokenType type) {
634   if (type == UNKNOWN) {
635     return ReportUnknown("Expected an object key or }.");
636   }
637 
638   // Close the object and return. This allows for trailing commas.
639   if (type == END_OBJECT) {
640     ow_->EndObject();
641     Advance();
642     --recursion_depth_;
643     return util::Status();
644   }
645 
646   util::Status result;
647   if (type == BEGIN_STRING) {
648     // Key is a string (standard JSON), parse it and store the string.
649     result = ParseStringHelper();
650     if (result.ok()) {
651       key_storage_.clear();
652       if (!parsed_storage_.empty()) {
653         parsed_storage_.swap(key_storage_);
654         key_ = StringPiece(key_storage_);
655       } else {
656         key_ = parsed_;
657       }
658       parsed_ = StringPiece();
659     }
660   } else if (type == BEGIN_KEY) {
661     // Key is a bare key (back compat), create a StringPiece pointing to it.
662     result = ParseKey();
663   } else {
664     // Unknown key type, report an error.
665     result = ReportFailure("Expected an object key or }.");
666   }
667   // On success we next expect an entry mid ':' then an object mid ',' or '}'
668   if (result.ok()) {
669     stack_.push(OBJ_MID);
670     stack_.push(ENTRY_MID);
671   }
672   return result;
673 }
674 
ParseEntryMid(TokenType type)675 util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
676   if (type == UNKNOWN) {
677     return ReportUnknown("Expected : between key:value pair.");
678   }
679   if (type == ENTRY_SEPARATOR) {
680     Advance();
681     stack_.push(VALUE);
682     return util::Status();
683   }
684   return ReportFailure("Expected : between key:value pair.");
685 }
686 
HandleBeginArray()687 util::Status JsonStreamParser::HandleBeginArray() {
688   GOOGLE_DCHECK_EQ('[', *p_.data());
689   Advance();
690   ow_->StartList(key_);
691   key_ = StringPiece();
692   stack_.push(ARRAY_VALUE);
693   return util::Status();
694 }
695 
ParseArrayValue(TokenType type)696 util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
697   if (type == UNKNOWN) {
698     return ReportUnknown("Expected a value or ] within an array.");
699   }
700 
701   if (type == END_ARRAY) {
702     ow_->EndList();
703     Advance();
704     return util::Status();
705   }
706 
707   // The ParseValue call may push something onto the stack so we need to make
708   // sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of
709   // empty-null array value is relying on this ARRAY_MID token.
710   stack_.push(ARRAY_MID);
711   util::Status result = ParseValue(type);
712   if (result == util::Status(util::error::CANCELLED, "")) {
713     // If we were cancelled, pop back off the ARRAY_MID so we don't try to
714     // push it on again when we try over.
715     stack_.pop();
716   }
717   return result;
718 }
719 
ParseArrayMid(TokenType type)720 util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
721   if (type == UNKNOWN) {
722     return ReportUnknown("Expected , or ] after array value.");
723   }
724 
725   if (type == END_ARRAY) {
726     ow_->EndList();
727     Advance();
728     return util::Status();
729   }
730 
731   // Found a comma, advance past it and expect an array value next.
732   if (type == VALUE_SEPARATOR) {
733     Advance();
734     stack_.push(ARRAY_VALUE);
735     return util::Status();
736   }
737   // Illegal token after array value.
738   return ReportFailure("Expected , or ] after array value.");
739 }
740 
ParseTrue()741 util::Status JsonStreamParser::ParseTrue() {
742   ow_->RenderBool(key_, true);
743   key_ = StringPiece();
744   p_.remove_prefix(true_len);
745   return util::Status();
746 }
747 
ParseFalse()748 util::Status JsonStreamParser::ParseFalse() {
749   ow_->RenderBool(key_, false);
750   key_ = StringPiece();
751   p_.remove_prefix(false_len);
752   return util::Status();
753 }
754 
ParseNull()755 util::Status JsonStreamParser::ParseNull() {
756   ow_->RenderNull(key_);
757   key_ = StringPiece();
758   p_.remove_prefix(null_len);
759   return util::Status();
760 }
761 
ParseEmptyNull()762 util::Status JsonStreamParser::ParseEmptyNull() {
763   ow_->RenderNull(key_);
764   key_ = StringPiece();
765   return util::Status();
766 }
767 
IsEmptyNullAllowed(TokenType type)768 bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) {
769   if (stack_.empty()) return false;
770   return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) ||
771          stack_.top() == OBJ_MID;
772 }
773 
ReportFailure(StringPiece message)774 util::Status JsonStreamParser::ReportFailure(StringPiece message) {
775   static const int kContextLength = 20;
776   const char* p_start = p_.data();
777   const char* json_start = json_.data();
778   const char* begin = std::max(p_start - kContextLength, json_start);
779   const char* end =
780       std::min(p_start + kContextLength, json_start + json_.size());
781   StringPiece segment(begin, end - begin);
782   std::string location(p_start - begin, ' ');
783   location.push_back('^');
784   return util::Status(util::error::INVALID_ARGUMENT,
785                       StrCat(message, "\n", segment, "\n", location));
786 }
787 
ReportUnknown(StringPiece message)788 util::Status JsonStreamParser::ReportUnknown(StringPiece message) {
789   // If we aren't finishing the parse, cancel parsing and try later.
790   if (!finishing_) {
791     return util::Status(util::error::CANCELLED, "");
792   }
793   if (p_.empty()) {
794     return ReportFailure(StrCat("Unexpected end of string. ", message));
795   }
796   return ReportFailure(message);
797 }
798 
IncrementRecursionDepth(StringPiece key) const799 util::Status JsonStreamParser::IncrementRecursionDepth(
800     StringPiece key) const {
801   if (++recursion_depth_ > max_recursion_depth_) {
802     return Status(
803         util::error::INVALID_ARGUMENT,
804         StrCat("Message too deep. Max recursion depth reached for key '",
805                      key, "'"));
806   }
807   return util::Status();
808 }
809 
SkipWhitespace()810 void JsonStreamParser::SkipWhitespace() {
811   while (!p_.empty() && ascii_isspace(*p_.data())) {
812     Advance();
813   }
814 }
815 
Advance()816 void JsonStreamParser::Advance() {
817   // Advance by moving one UTF8 character while making sure we don't go beyond
818   // the length of StringPiece.
819   p_.remove_prefix(std::min<int>(
820       p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
821 }
822 
ParseKey()823 util::Status JsonStreamParser::ParseKey() {
824   StringPiece original = p_;
825   if (!ConsumeKey(&p_, &key_)) {
826     return ReportFailure("Invalid key or variable name.");
827   }
828   // If we consumed everything but expect more data, reset p_ and cancel since
829   // we can't know if the key was complete or not.
830   if (!finishing_ && p_.empty()) {
831     p_ = original;
832     return util::Status(util::error::CANCELLED, "");
833   }
834   // Since we aren't using the key storage, clear it out.
835   key_storage_.clear();
836   return util::Status();
837 }
838 
GetNextTokenType()839 JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
840   SkipWhitespace();
841 
842   int size = p_.size();
843   if (size == 0) {
844     // If we ran out of data, report unknown and we'll place the previous parse
845     // type onto the stack and try again when we have more data.
846     return UNKNOWN;
847   }
848   // TODO(sven): Split this method based on context since different contexts
849   // support different tokens. Would slightly speed up processing?
850   const char* data = p_.data();
851   if (*data == '\"' || *data == '\'') return BEGIN_STRING;
852   if (*data == '-' || ('0' <= *data && *data <= '9')) {
853     return BEGIN_NUMBER;
854   }
855   if (size >= true_len && !strncmp(data, "true", true_len)) {
856     return BEGIN_TRUE;
857   }
858   if (size >= false_len && !strncmp(data, "false", false_len)) {
859     return BEGIN_FALSE;
860   }
861   if (size >= null_len && !strncmp(data, "null", null_len)) {
862     return BEGIN_NULL;
863   }
864   if (*data == '{') return BEGIN_OBJECT;
865   if (*data == '}') return END_OBJECT;
866   if (*data == '[') return BEGIN_ARRAY;
867   if (*data == ']') return END_ARRAY;
868   if (*data == ':') return ENTRY_SEPARATOR;
869   if (*data == ',') return VALUE_SEPARATOR;
870   if (MatchKey(p_)) {
871     return BEGIN_KEY;
872   }
873 
874   // We don't know that we necessarily have an invalid token here, just that we
875   // can't parse what we have so far. So we don't report an error and just
876   // return UNKNOWN so we can try again later when we have more data, or if we
877   // finish and we have leftovers.
878   return UNKNOWN;
879 }
880 
881 }  // namespace converter
882 }  // namespace util
883 }  // namespace protobuf
884 }  // namespace google
885