• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <istream>
16 #include <limits>
17 #include <memory>
18 #include <set>
19 #include <sstream>
20 #include <utility>
21 
22 #include <cstdio>
23 #if __cplusplus >= 201103L
24 
25 #if !defined(sscanf)
26 #define sscanf std::sscanf
27 #endif
28 
29 #endif //__cplusplus
30 
31 #if defined(_MSC_VER)
32 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
33 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
34 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
35 #endif //_MSC_VER
36 
37 #if defined(_MSC_VER)
38 // Disable warning about strdup being deprecated.
39 #pragma warning(disable : 4996)
40 #endif
41 
42 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
43 // time to change the stack limit
44 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
45 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
46 #endif
47 
48 static size_t const stackLimit_g =
49     JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
50 
51 namespace Json {
52 
53 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
54 typedef std::unique_ptr<CharReader> CharReaderPtr;
55 #else
56 typedef std::auto_ptr<CharReader> CharReaderPtr;
57 #endif
58 
59 // Implementation of class Features
60 // ////////////////////////////////
61 
62 Features::Features() = default;
63 
all()64 Features Features::all() { return {}; }
65 
strictMode()66 Features Features::strictMode() {
67   Features features;
68   features.allowComments_ = false;
69   features.strictRoot_ = true;
70   features.allowDroppedNullPlaceholders_ = false;
71   features.allowNumericKeys_ = false;
72   return features;
73 }
74 
75 // Implementation of class Reader
76 // ////////////////////////////////
77 
containsNewLine(Reader::Location begin,Reader::Location end)78 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
79   for (; begin < end; ++begin)
80     if (*begin == '\n' || *begin == '\r')
81       return true;
82   return false;
83 }
84 
85 // Class Reader
86 // //////////////////////////////////////////////////////////////////
87 
Reader()88 Reader::Reader()
89     : errors_(), document_(), commentsBefore_(), features_(Features::all()) {}
90 
Reader(const Features & features)91 Reader::Reader(const Features& features)
92     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95 
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document,
97                    Value& root,
98                    bool collectComments) {
99   document_.assign(document.begin(), document.end());
100   const char* begin = document_.c_str();
101   const char* end = begin + document_.length();
102   return parse(begin, end, root, collectComments);
103 }
104 
parse(std::istream & is,Value & root,bool collectComments)105 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
106   // std::istream_iterator<char> begin(is);
107   // std::istream_iterator<char> end;
108   // Those would allow streamed input from a file, if parse() were a
109   // template function.
110 
111   // Since String is reference-counted, this at least does not
112   // create an extra copy.
113   String doc;
114   std::getline(is, doc, (char)EOF);
115   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
116 }
117 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)118 bool Reader::parse(const char* beginDoc,
119                    const char* endDoc,
120                    Value& root,
121                    bool collectComments) {
122   if (!features_.allowComments_) {
123     collectComments = false;
124   }
125 
126   begin_ = beginDoc;
127   end_ = endDoc;
128   collectComments_ = collectComments;
129   current_ = begin_;
130   lastValueEnd_ = nullptr;
131   lastValue_ = nullptr;
132   commentsBefore_.clear();
133   errors_.clear();
134   while (!nodes_.empty())
135     nodes_.pop();
136   nodes_.push(&root);
137 
138   bool successful = readValue();
139   Token token;
140   skipCommentTokens(token);
141   if (collectComments_ && !commentsBefore_.empty())
142     root.setComment(commentsBefore_, commentAfter);
143   if (features_.strictRoot_) {
144     if (!root.isArray() && !root.isObject()) {
145       // Set error location to start of doc, ideally should be first token found
146       // in doc
147       token.type_ = tokenError;
148       token.start_ = beginDoc;
149       token.end_ = endDoc;
150       addError(
151           "A valid JSON document must be either an array or an object value.",
152           token);
153       return false;
154     }
155   }
156   return successful;
157 }
158 
readValue()159 bool Reader::readValue() {
160   // readValue() may call itself only if it calls readObject() or ReadArray().
161   // These methods execute nodes_.push() just before and nodes_.pop)() just
162   // after calling readValue(). parse() executes one nodes_.push(), so > instead
163   // of >=.
164   if (nodes_.size() > stackLimit_g)
165     throwRuntimeError("Exceeded stackLimit in readValue().");
166 
167   Token token;
168   skipCommentTokens(token);
169   bool successful = true;
170 
171   if (collectComments_ && !commentsBefore_.empty()) {
172     currentValue().setComment(commentsBefore_, commentBefore);
173     commentsBefore_.clear();
174   }
175 
176   switch (token.type_) {
177   case tokenObjectBegin:
178     successful = readObject(token);
179     currentValue().setOffsetLimit(current_ - begin_);
180     break;
181   case tokenArrayBegin:
182     successful = readArray(token);
183     currentValue().setOffsetLimit(current_ - begin_);
184     break;
185   case tokenNumber:
186     successful = decodeNumber(token);
187     break;
188   case tokenString:
189     successful = decodeString(token);
190     break;
191   case tokenTrue: {
192     Value v(true);
193     currentValue().swapPayload(v);
194     currentValue().setOffsetStart(token.start_ - begin_);
195     currentValue().setOffsetLimit(token.end_ - begin_);
196   } break;
197   case tokenFalse: {
198     Value v(false);
199     currentValue().swapPayload(v);
200     currentValue().setOffsetStart(token.start_ - begin_);
201     currentValue().setOffsetLimit(token.end_ - begin_);
202   } break;
203   case tokenNull: {
204     Value v;
205     currentValue().swapPayload(v);
206     currentValue().setOffsetStart(token.start_ - begin_);
207     currentValue().setOffsetLimit(token.end_ - begin_);
208   } break;
209   case tokenArraySeparator:
210   case tokenObjectEnd:
211   case tokenArrayEnd:
212     if (features_.allowDroppedNullPlaceholders_) {
213       // "Un-read" the current token and mark the current value as a null
214       // token.
215       current_--;
216       Value v;
217       currentValue().swapPayload(v);
218       currentValue().setOffsetStart(current_ - begin_ - 1);
219       currentValue().setOffsetLimit(current_ - begin_);
220       break;
221     } // Else, fall through...
222   default:
223     currentValue().setOffsetStart(token.start_ - begin_);
224     currentValue().setOffsetLimit(token.end_ - begin_);
225     return addError("Syntax error: value, object or array expected.", token);
226   }
227 
228   if (collectComments_) {
229     lastValueEnd_ = current_;
230     lastValue_ = &currentValue();
231   }
232 
233   return successful;
234 }
235 
skipCommentTokens(Token & token)236 void Reader::skipCommentTokens(Token& token) {
237   if (features_.allowComments_) {
238     do {
239       readToken(token);
240     } while (token.type_ == tokenComment);
241   } else {
242     readToken(token);
243   }
244 }
245 
readToken(Token & token)246 bool Reader::readToken(Token& token) {
247   skipSpaces();
248   token.start_ = current_;
249   Char c = getNextChar();
250   bool ok = true;
251   switch (c) {
252   case '{':
253     token.type_ = tokenObjectBegin;
254     break;
255   case '}':
256     token.type_ = tokenObjectEnd;
257     break;
258   case '[':
259     token.type_ = tokenArrayBegin;
260     break;
261   case ']':
262     token.type_ = tokenArrayEnd;
263     break;
264   case '"':
265     token.type_ = tokenString;
266     ok = readString();
267     break;
268   case '/':
269     token.type_ = tokenComment;
270     ok = readComment();
271     break;
272   case '0':
273   case '1':
274   case '2':
275   case '3':
276   case '4':
277   case '5':
278   case '6':
279   case '7':
280   case '8':
281   case '9':
282   case '-':
283     token.type_ = tokenNumber;
284     readNumber();
285     break;
286   case 't':
287     token.type_ = tokenTrue;
288     ok = match("rue", 3);
289     break;
290   case 'f':
291     token.type_ = tokenFalse;
292     ok = match("alse", 4);
293     break;
294   case 'n':
295     token.type_ = tokenNull;
296     ok = match("ull", 3);
297     break;
298   case ',':
299     token.type_ = tokenArraySeparator;
300     break;
301   case ':':
302     token.type_ = tokenMemberSeparator;
303     break;
304   case 0:
305     token.type_ = tokenEndOfStream;
306     break;
307   default:
308     ok = false;
309     break;
310   }
311   if (!ok)
312     token.type_ = tokenError;
313   token.end_ = current_;
314   return true;
315 }
316 
skipSpaces()317 void Reader::skipSpaces() {
318   while (current_ != end_) {
319     Char c = *current_;
320     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
321       ++current_;
322     else
323       break;
324   }
325 }
326 
match(Location pattern,int patternLength)327 bool Reader::match(Location pattern, int patternLength) {
328   if (end_ - current_ < patternLength)
329     return false;
330   int index = patternLength;
331   while (index--)
332     if (current_[index] != pattern[index])
333       return false;
334   current_ += patternLength;
335   return true;
336 }
337 
readComment()338 bool Reader::readComment() {
339   Location commentBegin = current_ - 1;
340   Char c = getNextChar();
341   bool successful = false;
342   if (c == '*')
343     successful = readCStyleComment();
344   else if (c == '/')
345     successful = readCppStyleComment();
346   if (!successful)
347     return false;
348 
349   if (collectComments_) {
350     CommentPlacement placement = commentBefore;
351     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
352       if (c != '*' || !containsNewLine(commentBegin, current_))
353         placement = commentAfterOnSameLine;
354     }
355 
356     addComment(commentBegin, current_, placement);
357   }
358   return true;
359 }
360 
normalizeEOL(Reader::Location begin,Reader::Location end)361 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
362   String normalized;
363   normalized.reserve(static_cast<size_t>(end - begin));
364   Reader::Location current = begin;
365   while (current != end) {
366     char c = *current++;
367     if (c == '\r') {
368       if (current != end && *current == '\n')
369         // convert dos EOL
370         ++current;
371       // convert Mac EOL
372       normalized += '\n';
373     } else {
374       normalized += c;
375     }
376   }
377   return normalized;
378 }
379 
addComment(Location begin,Location end,CommentPlacement placement)380 void Reader::addComment(Location begin,
381                         Location end,
382                         CommentPlacement placement) {
383   assert(collectComments_);
384   const String& normalized = normalizeEOL(begin, end);
385   if (placement == commentAfterOnSameLine) {
386     assert(lastValue_ != nullptr);
387     lastValue_->setComment(normalized, placement);
388   } else {
389     commentsBefore_ += normalized;
390   }
391 }
392 
readCStyleComment()393 bool Reader::readCStyleComment() {
394   while ((current_ + 1) < end_) {
395     Char c = getNextChar();
396     if (c == '*' && *current_ == '/')
397       break;
398   }
399   return getNextChar() == '/';
400 }
401 
readCppStyleComment()402 bool Reader::readCppStyleComment() {
403   while (current_ != end_) {
404     Char c = getNextChar();
405     if (c == '\n')
406       break;
407     if (c == '\r') {
408       // Consume DOS EOL. It will be normalized in addComment.
409       if (current_ != end_ && *current_ == '\n')
410         getNextChar();
411       // Break on Moc OS 9 EOL.
412       break;
413     }
414   }
415   return true;
416 }
417 
readNumber()418 void Reader::readNumber() {
419   const char* p = current_;
420   char c = '0'; // stopgap for already consumed character
421   // integral part
422   while (c >= '0' && c <= '9')
423     c = (current_ = p) < end_ ? *p++ : '\0';
424   // fractional part
425   if (c == '.') {
426     c = (current_ = p) < end_ ? *p++ : '\0';
427     while (c >= '0' && c <= '9')
428       c = (current_ = p) < end_ ? *p++ : '\0';
429   }
430   // exponential part
431   if (c == 'e' || c == 'E') {
432     c = (current_ = p) < end_ ? *p++ : '\0';
433     if (c == '+' || c == '-')
434       c = (current_ = p) < end_ ? *p++ : '\0';
435     while (c >= '0' && c <= '9')
436       c = (current_ = p) < end_ ? *p++ : '\0';
437   }
438 }
439 
readString()440 bool Reader::readString() {
441   Char c = '\0';
442   while (current_ != end_) {
443     c = getNextChar();
444     if (c == '\\')
445       getNextChar();
446     else if (c == '"')
447       break;
448   }
449   return c == '"';
450 }
451 
readObject(Token & token)452 bool Reader::readObject(Token& token) {
453   Token tokenName;
454   String name;
455   Value init(objectValue);
456   currentValue().swapPayload(init);
457   currentValue().setOffsetStart(token.start_ - begin_);
458   while (readToken(tokenName)) {
459     bool initialTokenOk = true;
460     while (tokenName.type_ == tokenComment && initialTokenOk)
461       initialTokenOk = readToken(tokenName);
462     if (!initialTokenOk)
463       break;
464     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
465       return true;
466     name.clear();
467     if (tokenName.type_ == tokenString) {
468       if (!decodeString(tokenName, name))
469         return recoverFromError(tokenObjectEnd);
470     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
471       Value numberName;
472       if (!decodeNumber(tokenName, numberName))
473         return recoverFromError(tokenObjectEnd);
474       name = String(numberName.asCString());
475     } else {
476       break;
477     }
478 
479     Token colon;
480     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
481       return addErrorAndRecover("Missing ':' after object member name", colon,
482                                 tokenObjectEnd);
483     }
484     Value& value = currentValue()[name];
485     nodes_.push(&value);
486     bool ok = readValue();
487     nodes_.pop();
488     if (!ok) // error already set
489       return recoverFromError(tokenObjectEnd);
490 
491     Token comma;
492     if (!readToken(comma) ||
493         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
494          comma.type_ != tokenComment)) {
495       return addErrorAndRecover("Missing ',' or '}' in object declaration",
496                                 comma, tokenObjectEnd);
497     }
498     bool finalizeTokenOk = true;
499     while (comma.type_ == tokenComment && finalizeTokenOk)
500       finalizeTokenOk = readToken(comma);
501     if (comma.type_ == tokenObjectEnd)
502       return true;
503   }
504   return addErrorAndRecover("Missing '}' or object member name", tokenName,
505                             tokenObjectEnd);
506 }
507 
readArray(Token & token)508 bool Reader::readArray(Token& token) {
509   Value init(arrayValue);
510   currentValue().swapPayload(init);
511   currentValue().setOffsetStart(token.start_ - begin_);
512   skipSpaces();
513   if (current_ != end_ && *current_ == ']') // empty array
514   {
515     Token endArray;
516     readToken(endArray);
517     return true;
518   }
519   int index = 0;
520   for (;;) {
521     Value& value = currentValue()[index++];
522     nodes_.push(&value);
523     bool ok = readValue();
524     nodes_.pop();
525     if (!ok) // error already set
526       return recoverFromError(tokenArrayEnd);
527 
528     Token currentToken;
529     // Accept Comment after last item in the array.
530     ok = readToken(currentToken);
531     while (currentToken.type_ == tokenComment && ok) {
532       ok = readToken(currentToken);
533     }
534     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
535                          currentToken.type_ != tokenArrayEnd);
536     if (!ok || badTokenType) {
537       return addErrorAndRecover("Missing ',' or ']' in array declaration",
538                                 currentToken, tokenArrayEnd);
539     }
540     if (currentToken.type_ == tokenArrayEnd)
541       break;
542   }
543   return true;
544 }
545 
decodeNumber(Token & token)546 bool Reader::decodeNumber(Token& token) {
547   Value decoded;
548   if (!decodeNumber(token, decoded))
549     return false;
550   currentValue().swapPayload(decoded);
551   currentValue().setOffsetStart(token.start_ - begin_);
552   currentValue().setOffsetLimit(token.end_ - begin_);
553   return true;
554 }
555 
decodeNumber(Token & token,Value & decoded)556 bool Reader::decodeNumber(Token& token, Value& decoded) {
557   // Attempts to parse the number as an integer. If the number is
558   // larger than the maximum supported value of an integer then
559   // we decode the number as a double.
560   Location current = token.start_;
561   bool isNegative = *current == '-';
562   if (isNegative)
563     ++current;
564   // TODO: Help the compiler do the div and mod at compile time or get rid of
565   // them.
566   Value::LargestUInt maxIntegerValue =
567       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
568                  : Value::maxLargestUInt;
569   Value::LargestUInt threshold = maxIntegerValue / 10;
570   Value::LargestUInt value = 0;
571   while (current < token.end_) {
572     Char c = *current++;
573     if (c < '0' || c > '9')
574       return decodeDouble(token, decoded);
575     auto digit(static_cast<Value::UInt>(c - '0'));
576     if (value >= threshold) {
577       // We've hit or exceeded the max value divided by 10 (rounded down). If
578       // a) we've only just touched the limit, b) this is the last digit, and
579       // c) it's small enough to fit in that rounding delta, we're okay.
580       // Otherwise treat this number as a double to avoid overflow.
581       if (value > threshold || current != token.end_ ||
582           digit > maxIntegerValue % 10) {
583         return decodeDouble(token, decoded);
584       }
585     }
586     value = value * 10 + digit;
587   }
588   if (isNegative && value == maxIntegerValue)
589     decoded = Value::minLargestInt;
590   else if (isNegative)
591     decoded = -Value::LargestInt(value);
592   else if (value <= Value::LargestUInt(Value::maxInt))
593     decoded = Value::LargestInt(value);
594   else
595     decoded = value;
596   return true;
597 }
598 
decodeDouble(Token & token)599 bool Reader::decodeDouble(Token& token) {
600   Value decoded;
601   if (!decodeDouble(token, decoded))
602     return false;
603   currentValue().swapPayload(decoded);
604   currentValue().setOffsetStart(token.start_ - begin_);
605   currentValue().setOffsetLimit(token.end_ - begin_);
606   return true;
607 }
608 
decodeDouble(Token & token,Value & decoded)609 bool Reader::decodeDouble(Token& token, Value& decoded) {
610   double value = 0;
611   String buffer(token.start_, token.end_);
612   IStringStream is(buffer);
613   if (!(is >> value))
614     return addError(
615         "'" + String(token.start_, token.end_) + "' is not a number.", token);
616   decoded = value;
617   return true;
618 }
619 
decodeString(Token & token)620 bool Reader::decodeString(Token& token) {
621   String decoded_string;
622   if (!decodeString(token, decoded_string))
623     return false;
624   Value decoded(decoded_string);
625   currentValue().swapPayload(decoded);
626   currentValue().setOffsetStart(token.start_ - begin_);
627   currentValue().setOffsetLimit(token.end_ - begin_);
628   return true;
629 }
630 
decodeString(Token & token,String & decoded)631 bool Reader::decodeString(Token& token, String& decoded) {
632   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
633   Location current = token.start_ + 1; // skip '"'
634   Location end = token.end_ - 1;       // do not include '"'
635   while (current != end) {
636     Char c = *current++;
637     if (c == '"')
638       break;
639     else if (c == '\\') {
640       if (current == end)
641         return addError("Empty escape sequence in string", token, current);
642       Char escape = *current++;
643       switch (escape) {
644       case '"':
645         decoded += '"';
646         break;
647       case '/':
648         decoded += '/';
649         break;
650       case '\\':
651         decoded += '\\';
652         break;
653       case 'b':
654         decoded += '\b';
655         break;
656       case 'f':
657         decoded += '\f';
658         break;
659       case 'n':
660         decoded += '\n';
661         break;
662       case 'r':
663         decoded += '\r';
664         break;
665       case 't':
666         decoded += '\t';
667         break;
668       case 'u': {
669         unsigned int unicode;
670         if (!decodeUnicodeCodePoint(token, current, end, unicode))
671           return false;
672         decoded += codePointToUTF8(unicode);
673       } break;
674       default:
675         return addError("Bad escape sequence in string", token, current);
676       }
677     } else {
678       decoded += c;
679     }
680   }
681   return true;
682 }
683 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)684 bool Reader::decodeUnicodeCodePoint(Token& token,
685                                     Location& current,
686                                     Location end,
687                                     unsigned int& unicode) {
688 
689   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
690     return false;
691   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
692     // surrogate pairs
693     if (end - current < 6)
694       return addError(
695           "additional six characters expected to parse unicode surrogate pair.",
696           token, current);
697     if (*(current++) == '\\' && *(current++) == 'u') {
698       unsigned int surrogatePair;
699       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
700         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
701       } else
702         return false;
703     } else
704       return addError("expecting another \\u token to begin the second half of "
705                       "a unicode surrogate pair",
706                       token, current);
707   }
708   return true;
709 }
710 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)711 bool Reader::decodeUnicodeEscapeSequence(Token& token,
712                                          Location& current,
713                                          Location end,
714                                          unsigned int& ret_unicode) {
715   if (end - current < 4)
716     return addError(
717         "Bad unicode escape sequence in string: four digits expected.", token,
718         current);
719   int unicode = 0;
720   for (int index = 0; index < 4; ++index) {
721     Char c = *current++;
722     unicode *= 16;
723     if (c >= '0' && c <= '9')
724       unicode += c - '0';
725     else if (c >= 'a' && c <= 'f')
726       unicode += c - 'a' + 10;
727     else if (c >= 'A' && c <= 'F')
728       unicode += c - 'A' + 10;
729     else
730       return addError(
731           "Bad unicode escape sequence in string: hexadecimal digit expected.",
732           token, current);
733   }
734   ret_unicode = static_cast<unsigned int>(unicode);
735   return true;
736 }
737 
addError(const String & message,Token & token,Location extra)738 bool Reader::addError(const String& message, Token& token, Location extra) {
739   ErrorInfo info;
740   info.token_ = token;
741   info.message_ = message;
742   info.extra_ = extra;
743   errors_.push_back(info);
744   return false;
745 }
746 
recoverFromError(TokenType skipUntilToken)747 bool Reader::recoverFromError(TokenType skipUntilToken) {
748   size_t const errorCount = errors_.size();
749   Token skip;
750   for (;;) {
751     if (!readToken(skip))
752       errors_.resize(errorCount); // discard errors caused by recovery
753     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
754       break;
755   }
756   errors_.resize(errorCount);
757   return false;
758 }
759 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)760 bool Reader::addErrorAndRecover(const String& message,
761                                 Token& token,
762                                 TokenType skipUntilToken) {
763   addError(message, token);
764   return recoverFromError(skipUntilToken);
765 }
766 
currentValue()767 Value& Reader::currentValue() { return *(nodes_.top()); }
768 
getNextChar()769 Reader::Char Reader::getNextChar() {
770   if (current_ == end_)
771     return 0;
772   return *current_++;
773 }
774 
getLocationLineAndColumn(Location location,int & line,int & column) const775 void Reader::getLocationLineAndColumn(Location location,
776                                       int& line,
777                                       int& column) const {
778   Location current = begin_;
779   Location lastLineStart = current;
780   line = 0;
781   while (current < location && current != end_) {
782     Char c = *current++;
783     if (c == '\r') {
784       if (*current == '\n')
785         ++current;
786       lastLineStart = current;
787       ++line;
788     } else if (c == '\n') {
789       lastLineStart = current;
790       ++line;
791     }
792   }
793   // column & line start at 1
794   column = int(location - lastLineStart) + 1;
795   ++line;
796 }
797 
getLocationLineAndColumn(Location location) const798 String Reader::getLocationLineAndColumn(Location location) const {
799   int line, column;
800   getLocationLineAndColumn(location, line, column);
801   char buffer[18 + 16 + 16 + 1];
802   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
803   return buffer;
804 }
805 
806 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const807 String Reader::getFormatedErrorMessages() const {
808   return getFormattedErrorMessages();
809 }
810 
getFormattedErrorMessages() const811 String Reader::getFormattedErrorMessages() const {
812   String formattedMessage;
813   for (const auto& error : errors_) {
814     formattedMessage +=
815         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
816     formattedMessage += "  " + error.message_ + "\n";
817     if (error.extra_)
818       formattedMessage +=
819           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
820   }
821   return formattedMessage;
822 }
823 
getStructuredErrors() const824 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
825   std::vector<Reader::StructuredError> allErrors;
826   for (const auto& error : errors_) {
827     Reader::StructuredError structured;
828     structured.offset_start = error.token_.start_ - begin_;
829     structured.offset_limit = error.token_.end_ - begin_;
830     structured.message = error.message_;
831     allErrors.push_back(structured);
832   }
833   return allErrors;
834 }
835 
pushError(const Value & value,const String & message)836 bool Reader::pushError(const Value& value, const String& message) {
837   ptrdiff_t const length = end_ - begin_;
838   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
839     return false;
840   Token token;
841   token.type_ = tokenError;
842   token.start_ = begin_ + value.getOffsetStart();
843   token.end_ = begin_ + value.getOffsetLimit();
844   ErrorInfo info;
845   info.token_ = token;
846   info.message_ = message;
847   info.extra_ = nullptr;
848   errors_.push_back(info);
849   return true;
850 }
851 
pushError(const Value & value,const String & message,const Value & extra)852 bool Reader::pushError(const Value& value,
853                        const String& message,
854                        const Value& extra) {
855   ptrdiff_t const length = end_ - begin_;
856   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
857       extra.getOffsetLimit() > length)
858     return false;
859   Token token;
860   token.type_ = tokenError;
861   token.start_ = begin_ + value.getOffsetStart();
862   token.end_ = begin_ + value.getOffsetLimit();
863   ErrorInfo info;
864   info.token_ = token;
865   info.message_ = message;
866   info.extra_ = begin_ + extra.getOffsetStart();
867   errors_.push_back(info);
868   return true;
869 }
870 
good() const871 bool Reader::good() const { return errors_.empty(); }
872 
873 // Originally copied from the Features class (now deprecated), used internally
874 // for features implementation.
875 class OurFeatures {
876 public:
877   static OurFeatures all();
878   bool allowComments_;
879   bool strictRoot_;
880   bool allowDroppedNullPlaceholders_;
881   bool allowNumericKeys_;
882   bool allowSingleQuotes_;
883   bool failIfExtra_;
884   bool rejectDupKeys_;
885   bool allowSpecialFloats_;
886   size_t stackLimit_;
887 }; // OurFeatures
888 
all()889 OurFeatures OurFeatures::all() { return {}; }
890 
891 // Implementation of class Reader
892 // ////////////////////////////////
893 
894 // Originally copied from the Reader class (now deprecated), used internally
895 // for implementing JSON reading.
896 class OurReader {
897 public:
898   typedef char Char;
899   typedef const Char* Location;
900   struct StructuredError {
901     ptrdiff_t offset_start;
902     ptrdiff_t offset_limit;
903     String message;
904   };
905 
906   OurReader(OurFeatures const& features);
907   bool parse(const char* beginDoc,
908              const char* endDoc,
909              Value& root,
910              bool collectComments = true);
911   String getFormattedErrorMessages() const;
912   std::vector<StructuredError> getStructuredErrors() const;
913   bool pushError(const Value& value, const String& message);
914   bool pushError(const Value& value, const String& message, const Value& extra);
915   bool good() const;
916 
917 private:
918   OurReader(OurReader const&);      // no impl
919   void operator=(OurReader const&); // no impl
920 
921   enum TokenType {
922     tokenEndOfStream = 0,
923     tokenObjectBegin,
924     tokenObjectEnd,
925     tokenArrayBegin,
926     tokenArrayEnd,
927     tokenString,
928     tokenNumber,
929     tokenTrue,
930     tokenFalse,
931     tokenNull,
932     tokenNaN,
933     tokenPosInf,
934     tokenNegInf,
935     tokenArraySeparator,
936     tokenMemberSeparator,
937     tokenComment,
938     tokenError
939   };
940 
941   class Token {
942   public:
943     TokenType type_;
944     Location start_;
945     Location end_;
946   };
947 
948   class ErrorInfo {
949   public:
950     Token token_;
951     String message_;
952     Location extra_;
953   };
954 
955   typedef std::deque<ErrorInfo> Errors;
956 
957   bool readToken(Token& token);
958   void skipSpaces();
959   bool match(Location pattern, int patternLength);
960   bool readComment();
961   bool readCStyleComment();
962   bool readCppStyleComment();
963   bool readString();
964   bool readStringSingleQuote();
965   bool readNumber(bool checkInf);
966   bool readValue();
967   bool readObject(Token& token);
968   bool readArray(Token& token);
969   bool decodeNumber(Token& token);
970   bool decodeNumber(Token& token, Value& decoded);
971   bool decodeString(Token& token);
972   bool decodeString(Token& token, String& decoded);
973   bool decodeDouble(Token& token);
974   bool decodeDouble(Token& token, Value& decoded);
975   bool decodeUnicodeCodePoint(Token& token,
976                               Location& current,
977                               Location end,
978                               unsigned int& unicode);
979   bool decodeUnicodeEscapeSequence(Token& token,
980                                    Location& current,
981                                    Location end,
982                                    unsigned int& unicode);
983   bool addError(const String& message, Token& token, Location extra = nullptr);
984   bool recoverFromError(TokenType skipUntilToken);
985   bool addErrorAndRecover(const String& message,
986                           Token& token,
987                           TokenType skipUntilToken);
988   void skipUntilSpace();
989   Value& currentValue();
990   Char getNextChar();
991   void
992   getLocationLineAndColumn(Location location, int& line, int& column) const;
993   String getLocationLineAndColumn(Location location) const;
994   void addComment(Location begin, Location end, CommentPlacement placement);
995   void skipCommentTokens(Token& token);
996 
997   static String normalizeEOL(Location begin, Location end);
998   static bool containsNewLine(Location begin, Location end);
999 
1000   typedef std::stack<Value*> Nodes;
1001   Nodes nodes_;
1002   Errors errors_;
1003   String document_;
1004   Location begin_;
1005   Location end_;
1006   Location current_;
1007   Location lastValueEnd_;
1008   Value* lastValue_;
1009   String commentsBefore_;
1010 
1011   OurFeatures const features_;
1012   bool collectComments_;
1013 }; // OurReader
1014 
1015 // complete copy of Read impl, for OurReader
1016 
containsNewLine(OurReader::Location begin,OurReader::Location end)1017 bool OurReader::containsNewLine(OurReader::Location begin,
1018                                 OurReader::Location end) {
1019   for (; begin < end; ++begin)
1020     if (*begin == '\n' || *begin == '\r')
1021       return true;
1022   return false;
1023 }
1024 
OurReader(OurFeatures const & features)1025 OurReader::OurReader(OurFeatures const& features)
1026     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1027       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1028 }
1029 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1030 bool OurReader::parse(const char* beginDoc,
1031                       const char* endDoc,
1032                       Value& root,
1033                       bool collectComments) {
1034   if (!features_.allowComments_) {
1035     collectComments = false;
1036   }
1037 
1038   begin_ = beginDoc;
1039   end_ = endDoc;
1040   collectComments_ = collectComments;
1041   current_ = begin_;
1042   lastValueEnd_ = nullptr;
1043   lastValue_ = nullptr;
1044   commentsBefore_.clear();
1045   errors_.clear();
1046   while (!nodes_.empty())
1047     nodes_.pop();
1048   nodes_.push(&root);
1049 
1050   bool successful = readValue();
1051   nodes_.pop();
1052   Token token;
1053   skipCommentTokens(token);
1054   if (features_.failIfExtra_) {
1055     if ((features_.strictRoot_ || token.type_ != tokenError) &&
1056         token.type_ != tokenEndOfStream) {
1057       addError("Extra non-whitespace after JSON value.", token);
1058       return false;
1059     }
1060   }
1061   if (collectComments_ && !commentsBefore_.empty())
1062     root.setComment(commentsBefore_, commentAfter);
1063   if (features_.strictRoot_) {
1064     if (!root.isArray() && !root.isObject()) {
1065       // Set error location to start of doc, ideally should be first token found
1066       // in doc
1067       token.type_ = tokenError;
1068       token.start_ = beginDoc;
1069       token.end_ = endDoc;
1070       addError(
1071           "A valid JSON document must be either an array or an object value.",
1072           token);
1073       return false;
1074     }
1075   }
1076   return successful;
1077 }
1078 
readValue()1079 bool OurReader::readValue() {
1080   //  To preserve the old behaviour we cast size_t to int.
1081   if (nodes_.size() > features_.stackLimit_)
1082     throwRuntimeError("Exceeded stackLimit in readValue().");
1083   Token token;
1084   skipCommentTokens(token);
1085   bool successful = true;
1086 
1087   if (collectComments_ && !commentsBefore_.empty()) {
1088     currentValue().setComment(commentsBefore_, commentBefore);
1089     commentsBefore_.clear();
1090   }
1091 
1092   switch (token.type_) {
1093   case tokenObjectBegin:
1094     successful = readObject(token);
1095     currentValue().setOffsetLimit(current_ - begin_);
1096     break;
1097   case tokenArrayBegin:
1098     successful = readArray(token);
1099     currentValue().setOffsetLimit(current_ - begin_);
1100     break;
1101   case tokenNumber:
1102     successful = decodeNumber(token);
1103     break;
1104   case tokenString:
1105     successful = decodeString(token);
1106     break;
1107   case tokenTrue: {
1108     Value v(true);
1109     currentValue().swapPayload(v);
1110     currentValue().setOffsetStart(token.start_ - begin_);
1111     currentValue().setOffsetLimit(token.end_ - begin_);
1112   } break;
1113   case tokenFalse: {
1114     Value v(false);
1115     currentValue().swapPayload(v);
1116     currentValue().setOffsetStart(token.start_ - begin_);
1117     currentValue().setOffsetLimit(token.end_ - begin_);
1118   } break;
1119   case tokenNull: {
1120     Value v;
1121     currentValue().swapPayload(v);
1122     currentValue().setOffsetStart(token.start_ - begin_);
1123     currentValue().setOffsetLimit(token.end_ - begin_);
1124   } break;
1125   case tokenNaN: {
1126     Value v(std::numeric_limits<double>::quiet_NaN());
1127     currentValue().swapPayload(v);
1128     currentValue().setOffsetStart(token.start_ - begin_);
1129     currentValue().setOffsetLimit(token.end_ - begin_);
1130   } break;
1131   case tokenPosInf: {
1132     Value v(std::numeric_limits<double>::infinity());
1133     currentValue().swapPayload(v);
1134     currentValue().setOffsetStart(token.start_ - begin_);
1135     currentValue().setOffsetLimit(token.end_ - begin_);
1136   } break;
1137   case tokenNegInf: {
1138     Value v(-std::numeric_limits<double>::infinity());
1139     currentValue().swapPayload(v);
1140     currentValue().setOffsetStart(token.start_ - begin_);
1141     currentValue().setOffsetLimit(token.end_ - begin_);
1142   } break;
1143   case tokenArraySeparator:
1144   case tokenObjectEnd:
1145   case tokenArrayEnd:
1146     if (features_.allowDroppedNullPlaceholders_) {
1147       // "Un-read" the current token and mark the current value as a null
1148       // token.
1149       current_--;
1150       Value v;
1151       currentValue().swapPayload(v);
1152       currentValue().setOffsetStart(current_ - begin_ - 1);
1153       currentValue().setOffsetLimit(current_ - begin_);
1154       break;
1155     } // else, fall through ...
1156   default:
1157     currentValue().setOffsetStart(token.start_ - begin_);
1158     currentValue().setOffsetLimit(token.end_ - begin_);
1159     return addError("Syntax error: value, object or array expected.", token);
1160   }
1161 
1162   if (collectComments_) {
1163     lastValueEnd_ = current_;
1164     lastValue_ = &currentValue();
1165   }
1166 
1167   return successful;
1168 }
1169 
skipCommentTokens(Token & token)1170 void OurReader::skipCommentTokens(Token& token) {
1171   if (features_.allowComments_) {
1172     do {
1173       readToken(token);
1174     } while (token.type_ == tokenComment);
1175   } else {
1176     readToken(token);
1177   }
1178 }
1179 
readToken(Token & token)1180 bool OurReader::readToken(Token& token) {
1181   skipSpaces();
1182   token.start_ = current_;
1183   Char c = getNextChar();
1184   bool ok = true;
1185   switch (c) {
1186   case '{':
1187     token.type_ = tokenObjectBegin;
1188     break;
1189   case '}':
1190     token.type_ = tokenObjectEnd;
1191     break;
1192   case '[':
1193     token.type_ = tokenArrayBegin;
1194     break;
1195   case ']':
1196     token.type_ = tokenArrayEnd;
1197     break;
1198   case '"':
1199     token.type_ = tokenString;
1200     ok = readString();
1201     break;
1202   case '\'':
1203     if (features_.allowSingleQuotes_) {
1204       token.type_ = tokenString;
1205       ok = readStringSingleQuote();
1206       break;
1207     } // else fall through
1208   case '/':
1209     token.type_ = tokenComment;
1210     ok = readComment();
1211     break;
1212   case '0':
1213   case '1':
1214   case '2':
1215   case '3':
1216   case '4':
1217   case '5':
1218   case '6':
1219   case '7':
1220   case '8':
1221   case '9':
1222     token.type_ = tokenNumber;
1223     readNumber(false);
1224     break;
1225   case '-':
1226     if (readNumber(true)) {
1227       token.type_ = tokenNumber;
1228     } else {
1229       token.type_ = tokenNegInf;
1230       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1231     }
1232     break;
1233   case 't':
1234     token.type_ = tokenTrue;
1235     ok = match("rue", 3);
1236     break;
1237   case 'f':
1238     token.type_ = tokenFalse;
1239     ok = match("alse", 4);
1240     break;
1241   case 'n':
1242     token.type_ = tokenNull;
1243     ok = match("ull", 3);
1244     break;
1245   case 'N':
1246     if (features_.allowSpecialFloats_) {
1247       token.type_ = tokenNaN;
1248       ok = match("aN", 2);
1249     } else {
1250       ok = false;
1251     }
1252     break;
1253   case 'I':
1254     if (features_.allowSpecialFloats_) {
1255       token.type_ = tokenPosInf;
1256       ok = match("nfinity", 7);
1257     } else {
1258       ok = false;
1259     }
1260     break;
1261   case ',':
1262     token.type_ = tokenArraySeparator;
1263     break;
1264   case ':':
1265     token.type_ = tokenMemberSeparator;
1266     break;
1267   case 0:
1268     token.type_ = tokenEndOfStream;
1269     break;
1270   default:
1271     ok = false;
1272     break;
1273   }
1274   if (!ok)
1275     token.type_ = tokenError;
1276   token.end_ = current_;
1277   return true;
1278 }
1279 
skipSpaces()1280 void OurReader::skipSpaces() {
1281   while (current_ != end_) {
1282     Char c = *current_;
1283     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1284       ++current_;
1285     else
1286       break;
1287   }
1288 }
1289 
match(Location pattern,int patternLength)1290 bool OurReader::match(Location pattern, int patternLength) {
1291   if (end_ - current_ < patternLength)
1292     return false;
1293   int index = patternLength;
1294   while (index--)
1295     if (current_[index] != pattern[index])
1296       return false;
1297   current_ += patternLength;
1298   return true;
1299 }
1300 
readComment()1301 bool OurReader::readComment() {
1302   Location commentBegin = current_ - 1;
1303   Char c = getNextChar();
1304   bool successful = false;
1305   if (c == '*')
1306     successful = readCStyleComment();
1307   else if (c == '/')
1308     successful = readCppStyleComment();
1309   if (!successful)
1310     return false;
1311 
1312   if (collectComments_) {
1313     CommentPlacement placement = commentBefore;
1314     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1315       if (c != '*' || !containsNewLine(commentBegin, current_))
1316         placement = commentAfterOnSameLine;
1317     }
1318 
1319     addComment(commentBegin, current_, placement);
1320   }
1321   return true;
1322 }
1323 
normalizeEOL(OurReader::Location begin,OurReader::Location end)1324 String OurReader::normalizeEOL(OurReader::Location begin,
1325                                OurReader::Location end) {
1326   String normalized;
1327   normalized.reserve(static_cast<size_t>(end - begin));
1328   OurReader::Location current = begin;
1329   while (current != end) {
1330     char c = *current++;
1331     if (c == '\r') {
1332       if (current != end && *current == '\n')
1333         // convert dos EOL
1334         ++current;
1335       // convert Mac EOL
1336       normalized += '\n';
1337     } else {
1338       normalized += c;
1339     }
1340   }
1341   return normalized;
1342 }
1343 
addComment(Location begin,Location end,CommentPlacement placement)1344 void OurReader::addComment(Location begin,
1345                            Location end,
1346                            CommentPlacement placement) {
1347   assert(collectComments_);
1348   const String& normalized = normalizeEOL(begin, end);
1349   if (placement == commentAfterOnSameLine) {
1350     assert(lastValue_ != nullptr);
1351     lastValue_->setComment(normalized, placement);
1352   } else {
1353     commentsBefore_ += normalized;
1354   }
1355 }
1356 
readCStyleComment()1357 bool OurReader::readCStyleComment() {
1358   while ((current_ + 1) < end_) {
1359     Char c = getNextChar();
1360     if (c == '*' && *current_ == '/')
1361       break;
1362   }
1363   return getNextChar() == '/';
1364 }
1365 
readCppStyleComment()1366 bool OurReader::readCppStyleComment() {
1367   while (current_ != end_) {
1368     Char c = getNextChar();
1369     if (c == '\n')
1370       break;
1371     if (c == '\r') {
1372       // Consume DOS EOL. It will be normalized in addComment.
1373       if (current_ != end_ && *current_ == '\n')
1374         getNextChar();
1375       // Break on Moc OS 9 EOL.
1376       break;
1377     }
1378   }
1379   return true;
1380 }
1381 
readNumber(bool checkInf)1382 bool OurReader::readNumber(bool checkInf) {
1383   const char* p = current_;
1384   if (checkInf && p != end_ && *p == 'I') {
1385     current_ = ++p;
1386     return false;
1387   }
1388   char c = '0'; // stopgap for already consumed character
1389   // integral part
1390   while (c >= '0' && c <= '9')
1391     c = (current_ = p) < end_ ? *p++ : '\0';
1392   // fractional part
1393   if (c == '.') {
1394     c = (current_ = p) < end_ ? *p++ : '\0';
1395     while (c >= '0' && c <= '9')
1396       c = (current_ = p) < end_ ? *p++ : '\0';
1397   }
1398   // exponential part
1399   if (c == 'e' || c == 'E') {
1400     c = (current_ = p) < end_ ? *p++ : '\0';
1401     if (c == '+' || c == '-')
1402       c = (current_ = p) < end_ ? *p++ : '\0';
1403     while (c >= '0' && c <= '9')
1404       c = (current_ = p) < end_ ? *p++ : '\0';
1405   }
1406   return true;
1407 }
readString()1408 bool OurReader::readString() {
1409   Char c = 0;
1410   while (current_ != end_) {
1411     c = getNextChar();
1412     if (c == '\\')
1413       getNextChar();
1414     else if (c == '"')
1415       break;
1416   }
1417   return c == '"';
1418 }
1419 
readStringSingleQuote()1420 bool OurReader::readStringSingleQuote() {
1421   Char c = 0;
1422   while (current_ != end_) {
1423     c = getNextChar();
1424     if (c == '\\')
1425       getNextChar();
1426     else if (c == '\'')
1427       break;
1428   }
1429   return c == '\'';
1430 }
1431 
readObject(Token & token)1432 bool OurReader::readObject(Token& token) {
1433   Token tokenName;
1434   String name;
1435   Value init(objectValue);
1436   currentValue().swapPayload(init);
1437   currentValue().setOffsetStart(token.start_ - begin_);
1438   while (readToken(tokenName)) {
1439     bool initialTokenOk = true;
1440     while (tokenName.type_ == tokenComment && initialTokenOk)
1441       initialTokenOk = readToken(tokenName);
1442     if (!initialTokenOk)
1443       break;
1444     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1445       return true;
1446     name.clear();
1447     if (tokenName.type_ == tokenString) {
1448       if (!decodeString(tokenName, name))
1449         return recoverFromError(tokenObjectEnd);
1450     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1451       Value numberName;
1452       if (!decodeNumber(tokenName, numberName))
1453         return recoverFromError(tokenObjectEnd);
1454       name = numberName.asString();
1455     } else {
1456       break;
1457     }
1458     if (name.length() >= (1U << 30))
1459       throwRuntimeError("keylength >= 2^30");
1460     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1461       String msg = "Duplicate key: '" + name + "'";
1462       return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1463     }
1464 
1465     Token colon;
1466     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1467       return addErrorAndRecover("Missing ':' after object member name", colon,
1468                                 tokenObjectEnd);
1469     }
1470     Value& value = currentValue()[name];
1471     nodes_.push(&value);
1472     bool ok = readValue();
1473     nodes_.pop();
1474     if (!ok) // error already set
1475       return recoverFromError(tokenObjectEnd);
1476 
1477     Token comma;
1478     if (!readToken(comma) ||
1479         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1480          comma.type_ != tokenComment)) {
1481       return addErrorAndRecover("Missing ',' or '}' in object declaration",
1482                                 comma, tokenObjectEnd);
1483     }
1484     bool finalizeTokenOk = true;
1485     while (comma.type_ == tokenComment && finalizeTokenOk)
1486       finalizeTokenOk = readToken(comma);
1487     if (comma.type_ == tokenObjectEnd)
1488       return true;
1489   }
1490   return addErrorAndRecover("Missing '}' or object member name", tokenName,
1491                             tokenObjectEnd);
1492 }
1493 
readArray(Token & token)1494 bool OurReader::readArray(Token& token) {
1495   Value init(arrayValue);
1496   currentValue().swapPayload(init);
1497   currentValue().setOffsetStart(token.start_ - begin_);
1498   skipSpaces();
1499   if (current_ != end_ && *current_ == ']') // empty array
1500   {
1501     Token endArray;
1502     readToken(endArray);
1503     return true;
1504   }
1505   int index = 0;
1506   for (;;) {
1507     Value& value = currentValue()[index++];
1508     nodes_.push(&value);
1509     bool ok = readValue();
1510     nodes_.pop();
1511     if (!ok) // error already set
1512       return recoverFromError(tokenArrayEnd);
1513 
1514     Token currentToken;
1515     // Accept Comment after last item in the array.
1516     ok = readToken(currentToken);
1517     while (currentToken.type_ == tokenComment && ok) {
1518       ok = readToken(currentToken);
1519     }
1520     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1521                          currentToken.type_ != tokenArrayEnd);
1522     if (!ok || badTokenType) {
1523       return addErrorAndRecover("Missing ',' or ']' in array declaration",
1524                                 currentToken, tokenArrayEnd);
1525     }
1526     if (currentToken.type_ == tokenArrayEnd)
1527       break;
1528   }
1529   return true;
1530 }
1531 
decodeNumber(Token & token)1532 bool OurReader::decodeNumber(Token& token) {
1533   Value decoded;
1534   if (!decodeNumber(token, decoded))
1535     return false;
1536   currentValue().swapPayload(decoded);
1537   currentValue().setOffsetStart(token.start_ - begin_);
1538   currentValue().setOffsetLimit(token.end_ - begin_);
1539   return true;
1540 }
1541 
decodeNumber(Token & token,Value & decoded)1542 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1543   // Attempts to parse the number as an integer. If the number is
1544   // larger than the maximum supported value of an integer then
1545   // we decode the number as a double.
1546   Location current = token.start_;
1547   bool isNegative = *current == '-';
1548   if (isNegative)
1549     ++current;
1550 
1551   // TODO(issue #960): Change to constexpr
1552   static const auto positive_threshold = Value::maxLargestUInt / 10;
1553   static const auto positive_last_digit = Value::maxLargestUInt % 10;
1554   static const auto negative_threshold =
1555       Value::LargestUInt(Value::minLargestInt) / 10;
1556   static const auto negative_last_digit =
1557       Value::LargestUInt(Value::minLargestInt) % 10;
1558 
1559   const auto threshold = isNegative ? negative_threshold : positive_threshold;
1560   const auto last_digit =
1561       isNegative ? negative_last_digit : positive_last_digit;
1562 
1563   Value::LargestUInt value = 0;
1564   while (current < token.end_) {
1565     Char c = *current++;
1566     if (c < '0' || c > '9')
1567       return decodeDouble(token, decoded);
1568 
1569     const auto digit(static_cast<Value::UInt>(c - '0'));
1570     if (value >= threshold) {
1571       // We've hit or exceeded the max value divided by 10 (rounded down). If
1572       // a) we've only just touched the limit, meaing value == threshold,
1573       // b) this is the last digit, or
1574       // c) it's small enough to fit in that rounding delta, we're okay.
1575       // Otherwise treat this number as a double to avoid overflow.
1576       if (value > threshold || current != token.end_ || digit > last_digit) {
1577         return decodeDouble(token, decoded);
1578       }
1579     }
1580     value = value * 10 + digit;
1581   }
1582 
1583   if (isNegative)
1584     decoded = -Value::LargestInt(value);
1585   else if (value <= Value::LargestUInt(Value::maxLargestInt))
1586     decoded = Value::LargestInt(value);
1587   else
1588     decoded = value;
1589 
1590   return true;
1591 }
1592 
decodeDouble(Token & token)1593 bool OurReader::decodeDouble(Token& token) {
1594   Value decoded;
1595   if (!decodeDouble(token, decoded))
1596     return false;
1597   currentValue().swapPayload(decoded);
1598   currentValue().setOffsetStart(token.start_ - begin_);
1599   currentValue().setOffsetLimit(token.end_ - begin_);
1600   return true;
1601 }
1602 
decodeDouble(Token & token,Value & decoded)1603 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1604   double value = 0;
1605   const int bufferSize = 32;
1606   int count;
1607   ptrdiff_t const length = token.end_ - token.start_;
1608 
1609   // Sanity check to avoid buffer overflow exploits.
1610   if (length < 0) {
1611     return addError("Unable to parse token length", token);
1612   }
1613   auto const ulength = static_cast<size_t>(length);
1614 
1615   // Avoid using a string constant for the format control string given to
1616   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1617   // info:
1618   //
1619   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1620   char format[] = "%lf";
1621 
1622   if (length <= bufferSize) {
1623     Char buffer[bufferSize + 1];
1624     memcpy(buffer, token.start_, ulength);
1625     buffer[length] = 0;
1626     fixNumericLocaleInput(buffer, buffer + length);
1627     count = sscanf(buffer, format, &value);
1628   } else {
1629     String buffer(token.start_, token.end_);
1630     count = sscanf(buffer.c_str(), format, &value);
1631   }
1632 
1633   if (count != 1)
1634     return addError(
1635         "'" + String(token.start_, token.end_) + "' is not a number.", token);
1636   decoded = value;
1637   return true;
1638 }
1639 
decodeString(Token & token)1640 bool OurReader::decodeString(Token& token) {
1641   String decoded_string;
1642   if (!decodeString(token, decoded_string))
1643     return false;
1644   Value decoded(decoded_string);
1645   currentValue().swapPayload(decoded);
1646   currentValue().setOffsetStart(token.start_ - begin_);
1647   currentValue().setOffsetLimit(token.end_ - begin_);
1648   return true;
1649 }
1650 
decodeString(Token & token,String & decoded)1651 bool OurReader::decodeString(Token& token, String& decoded) {
1652   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1653   Location current = token.start_ + 1; // skip '"'
1654   Location end = token.end_ - 1;       // do not include '"'
1655   while (current != end) {
1656     Char c = *current++;
1657     if (c == '"')
1658       break;
1659     else if (c == '\\') {
1660       if (current == end)
1661         return addError("Empty escape sequence in string", token, current);
1662       Char escape = *current++;
1663       switch (escape) {
1664       case '"':
1665         decoded += '"';
1666         break;
1667       case '/':
1668         decoded += '/';
1669         break;
1670       case '\\':
1671         decoded += '\\';
1672         break;
1673       case 'b':
1674         decoded += '\b';
1675         break;
1676       case 'f':
1677         decoded += '\f';
1678         break;
1679       case 'n':
1680         decoded += '\n';
1681         break;
1682       case 'r':
1683         decoded += '\r';
1684         break;
1685       case 't':
1686         decoded += '\t';
1687         break;
1688       case 'u': {
1689         unsigned int unicode;
1690         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1691           return false;
1692         decoded += codePointToUTF8(unicode);
1693       } break;
1694       default:
1695         return addError("Bad escape sequence in string", token, current);
1696       }
1697     } else {
1698       decoded += c;
1699     }
1700   }
1701   return true;
1702 }
1703 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1704 bool OurReader::decodeUnicodeCodePoint(Token& token,
1705                                        Location& current,
1706                                        Location end,
1707                                        unsigned int& unicode) {
1708 
1709   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1710     return false;
1711   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1712     // surrogate pairs
1713     if (end - current < 6)
1714       return addError(
1715           "additional six characters expected to parse unicode surrogate pair.",
1716           token, current);
1717     if (*(current++) == '\\' && *(current++) == 'u') {
1718       unsigned int surrogatePair;
1719       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1720         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1721       } else
1722         return false;
1723     } else
1724       return addError("expecting another \\u token to begin the second half of "
1725                       "a unicode surrogate pair",
1726                       token, current);
1727   }
1728   return true;
1729 }
1730 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1731 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1732                                             Location& current,
1733                                             Location end,
1734                                             unsigned int& ret_unicode) {
1735   if (end - current < 4)
1736     return addError(
1737         "Bad unicode escape sequence in string: four digits expected.", token,
1738         current);
1739   int unicode = 0;
1740   for (int index = 0; index < 4; ++index) {
1741     Char c = *current++;
1742     unicode *= 16;
1743     if (c >= '0' && c <= '9')
1744       unicode += c - '0';
1745     else if (c >= 'a' && c <= 'f')
1746       unicode += c - 'a' + 10;
1747     else if (c >= 'A' && c <= 'F')
1748       unicode += c - 'A' + 10;
1749     else
1750       return addError(
1751           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1752           token, current);
1753   }
1754   ret_unicode = static_cast<unsigned int>(unicode);
1755   return true;
1756 }
1757 
addError(const String & message,Token & token,Location extra)1758 bool OurReader::addError(const String& message, Token& token, Location extra) {
1759   ErrorInfo info;
1760   info.token_ = token;
1761   info.message_ = message;
1762   info.extra_ = extra;
1763   errors_.push_back(info);
1764   return false;
1765 }
1766 
recoverFromError(TokenType skipUntilToken)1767 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1768   size_t errorCount = errors_.size();
1769   Token skip;
1770   for (;;) {
1771     if (!readToken(skip))
1772       errors_.resize(errorCount); // discard errors caused by recovery
1773     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1774       break;
1775   }
1776   errors_.resize(errorCount);
1777   return false;
1778 }
1779 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1780 bool OurReader::addErrorAndRecover(const String& message,
1781                                    Token& token,
1782                                    TokenType skipUntilToken) {
1783   addError(message, token);
1784   return recoverFromError(skipUntilToken);
1785 }
1786 
currentValue()1787 Value& OurReader::currentValue() { return *(nodes_.top()); }
1788 
getNextChar()1789 OurReader::Char OurReader::getNextChar() {
1790   if (current_ == end_)
1791     return 0;
1792   return *current_++;
1793 }
1794 
getLocationLineAndColumn(Location location,int & line,int & column) const1795 void OurReader::getLocationLineAndColumn(Location location,
1796                                          int& line,
1797                                          int& column) const {
1798   Location current = begin_;
1799   Location lastLineStart = current;
1800   line = 0;
1801   while (current < location && current != end_) {
1802     Char c = *current++;
1803     if (c == '\r') {
1804       if (*current == '\n')
1805         ++current;
1806       lastLineStart = current;
1807       ++line;
1808     } else if (c == '\n') {
1809       lastLineStart = current;
1810       ++line;
1811     }
1812   }
1813   // column & line start at 1
1814   column = int(location - lastLineStart) + 1;
1815   ++line;
1816 }
1817 
getLocationLineAndColumn(Location location) const1818 String OurReader::getLocationLineAndColumn(Location location) const {
1819   int line, column;
1820   getLocationLineAndColumn(location, line, column);
1821   char buffer[18 + 16 + 16 + 1];
1822   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1823   return buffer;
1824 }
1825 
getFormattedErrorMessages() const1826 String OurReader::getFormattedErrorMessages() const {
1827   String formattedMessage;
1828   for (const auto& error : errors_) {
1829     formattedMessage +=
1830         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1831     formattedMessage += "  " + error.message_ + "\n";
1832     if (error.extra_)
1833       formattedMessage +=
1834           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1835   }
1836   return formattedMessage;
1837 }
1838 
getStructuredErrors() const1839 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1840   std::vector<OurReader::StructuredError> allErrors;
1841   for (const auto& error : errors_) {
1842     OurReader::StructuredError structured;
1843     structured.offset_start = error.token_.start_ - begin_;
1844     structured.offset_limit = error.token_.end_ - begin_;
1845     structured.message = error.message_;
1846     allErrors.push_back(structured);
1847   }
1848   return allErrors;
1849 }
1850 
pushError(const Value & value,const String & message)1851 bool OurReader::pushError(const Value& value, const String& message) {
1852   ptrdiff_t length = end_ - begin_;
1853   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
1854     return false;
1855   Token token;
1856   token.type_ = tokenError;
1857   token.start_ = begin_ + value.getOffsetStart();
1858   token.end_ = begin_ + value.getOffsetLimit();
1859   ErrorInfo info;
1860   info.token_ = token;
1861   info.message_ = message;
1862   info.extra_ = nullptr;
1863   errors_.push_back(info);
1864   return true;
1865 }
1866 
pushError(const Value & value,const String & message,const Value & extra)1867 bool OurReader::pushError(const Value& value,
1868                           const String& message,
1869                           const Value& extra) {
1870   ptrdiff_t length = end_ - begin_;
1871   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
1872       extra.getOffsetLimit() > length)
1873     return false;
1874   Token token;
1875   token.type_ = tokenError;
1876   token.start_ = begin_ + value.getOffsetStart();
1877   token.end_ = begin_ + value.getOffsetLimit();
1878   ErrorInfo info;
1879   info.token_ = token;
1880   info.message_ = message;
1881   info.extra_ = begin_ + extra.getOffsetStart();
1882   errors_.push_back(info);
1883   return true;
1884 }
1885 
good() const1886 bool OurReader::good() const { return errors_.empty(); }
1887 
1888 class OurCharReader : public CharReader {
1889   bool const collectComments_;
1890   OurReader reader_;
1891 
1892 public:
OurCharReader(bool collectComments,OurFeatures const & features)1893   OurCharReader(bool collectComments, OurFeatures const& features)
1894       : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1895   bool parse(char const* beginDoc,
1896              char const* endDoc,
1897              Value* root,
1898              String* errs) override {
1899     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1900     if (errs) {
1901       *errs = reader_.getFormattedErrorMessages();
1902     }
1903     return ok;
1904   }
1905 };
1906 
CharReaderBuilder()1907 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1908 CharReaderBuilder::~CharReaderBuilder() = default;
newCharReader() const1909 CharReader* CharReaderBuilder::newCharReader() const {
1910   bool collectComments = settings_["collectComments"].asBool();
1911   OurFeatures features = OurFeatures::all();
1912   features.allowComments_ = settings_["allowComments"].asBool();
1913   features.strictRoot_ = settings_["strictRoot"].asBool();
1914   features.allowDroppedNullPlaceholders_ =
1915       settings_["allowDroppedNullPlaceholders"].asBool();
1916   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1917   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1918 
1919   // Stack limit is always a size_t, so we get this as an unsigned int
1920   // regardless of it we have 64-bit integer support enabled.
1921   features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1922   features.failIfExtra_ = settings_["failIfExtra"].asBool();
1923   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1924   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1925   return new OurCharReader(collectComments, features);
1926 }
getValidReaderKeys(std::set<String> * valid_keys)1927 static void getValidReaderKeys(std::set<String>* valid_keys) {
1928   valid_keys->clear();
1929   valid_keys->insert("collectComments");
1930   valid_keys->insert("allowComments");
1931   valid_keys->insert("strictRoot");
1932   valid_keys->insert("allowDroppedNullPlaceholders");
1933   valid_keys->insert("allowNumericKeys");
1934   valid_keys->insert("allowSingleQuotes");
1935   valid_keys->insert("stackLimit");
1936   valid_keys->insert("failIfExtra");
1937   valid_keys->insert("rejectDupKeys");
1938   valid_keys->insert("allowSpecialFloats");
1939 }
validate(Json::Value * invalid) const1940 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1941   Json::Value my_invalid;
1942   if (!invalid)
1943     invalid = &my_invalid; // so we do not need to test for NULL
1944   Json::Value& inv = *invalid;
1945   std::set<String> valid_keys;
1946   getValidReaderKeys(&valid_keys);
1947   Value::Members keys = settings_.getMemberNames();
1948   size_t n = keys.size();
1949   for (size_t i = 0; i < n; ++i) {
1950     String const& key = keys[i];
1951     if (valid_keys.find(key) == valid_keys.end()) {
1952       inv[key] = settings_[key];
1953     }
1954   }
1955   return inv.empty();
1956 }
operator [](const String & key)1957 Value& CharReaderBuilder::operator[](const String& key) {
1958   return settings_[key];
1959 }
1960 // static
strictMode(Json::Value * settings)1961 void CharReaderBuilder::strictMode(Json::Value* settings) {
1962   //! [CharReaderBuilderStrictMode]
1963   (*settings)["allowComments"] = false;
1964   (*settings)["strictRoot"] = true;
1965   (*settings)["allowDroppedNullPlaceholders"] = false;
1966   (*settings)["allowNumericKeys"] = false;
1967   (*settings)["allowSingleQuotes"] = false;
1968   (*settings)["stackLimit"] = 1000;
1969   (*settings)["failIfExtra"] = true;
1970   (*settings)["rejectDupKeys"] = true;
1971   (*settings)["allowSpecialFloats"] = false;
1972   //! [CharReaderBuilderStrictMode]
1973 }
1974 // static
setDefaults(Json::Value * settings)1975 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1976   //! [CharReaderBuilderDefaults]
1977   (*settings)["collectComments"] = true;
1978   (*settings)["allowComments"] = true;
1979   (*settings)["strictRoot"] = false;
1980   (*settings)["allowDroppedNullPlaceholders"] = false;
1981   (*settings)["allowNumericKeys"] = false;
1982   (*settings)["allowSingleQuotes"] = false;
1983   (*settings)["stackLimit"] = 1000;
1984   (*settings)["failIfExtra"] = false;
1985   (*settings)["rejectDupKeys"] = false;
1986   (*settings)["allowSpecialFloats"] = false;
1987   //! [CharReaderBuilderDefaults]
1988 }
1989 
1990 //////////////////////////////////
1991 // global functions
1992 
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1993 bool parseFromStream(CharReader::Factory const& fact,
1994                      IStream& sin,
1995                      Value* root,
1996                      String* errs) {
1997   OStringStream ssin;
1998   ssin << sin.rdbuf();
1999   String doc = ssin.str();
2000   char const* begin = doc.data();
2001   char const* end = begin + doc.size();
2002   // Note that we do not actually need a null-terminator.
2003   CharReaderPtr const reader(fact.newCharReader());
2004   return reader->parse(begin, end, root, errs);
2005 }
2006 
operator >>(IStream & sin,Value & root)2007 IStream& operator>>(IStream& sin, Value& root) {
2008   CharReaderBuilder b;
2009   String errs;
2010   bool ok = parseFromStream(b, sin, &root, &errs);
2011   if (!ok) {
2012     throwRuntimeError(errs);
2013   }
2014   return sin;
2015 }
2016 
2017 } // namespace Json
2018