• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <iostream>
16 #include <istream>
17 #include <limits>
18 #include <memory>
19 #include <set>
20 #include <sstream>
21 #include <utility>
22 
23 #include <cstdio>
24 #if __cplusplus >= 201103L
25 
26 #if !defined(sscanf)
27 #define sscanf std::sscanf
28 #endif
29 
30 #endif //__cplusplus
31 
32 #if defined(_MSC_VER)
33 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
34 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
35 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
36 #endif //_MSC_VER
37 
38 #if defined(_MSC_VER)
39 // Disable warning about strdup being deprecated.
40 #pragma warning(disable : 4996)
41 #endif
42 
43 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
44 // time to change the stack limit
45 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
46 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
47 #endif
48 
49 static size_t const stackLimit_g =
50     JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
51 
52 namespace Json {
53 
54 typedef CharReader* CharReaderPtr;
55 
56 // Implementation of class Features
57 // ////////////////////////////////
58 
Features()59 Features::Features()
60     : allowComments_(true), strictRoot_(false),
61       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
all()62 Features Features::all() { return Features(); }
63 
strictMode()64 Features Features::strictMode() {
65   Features features;
66   features.allowComments_ = false;
67   features.strictRoot_ = true;
68   features.allowDroppedNullPlaceholders_ = false;
69   features.allowNumericKeys_ = false;
70   return features;
71 }
72 
73 // Implementation of class Reader
74 // ////////////////////////////////
75 
containsNewLine(Reader::Location begin,Reader::Location end)76 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
77   for (; begin < end; ++begin)
78     if (*begin == '\n' || *begin == '\r')
79       return true;
80   return false;
81 }
82 
83 // Class Reader
84 // //////////////////////////////////////////////////////////////////
85 
Reader()86 Reader::Reader()
87     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
88       lastValue_(), commentsBefore_(), features_(Features::all()),
89       collectComments_() {}
90 
Reader(const Features & features)91 Reader::Reader(const Features& features)
92     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95 
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document, Value& root,
97                    bool collectComments) {
98   document_.assign(document.begin(), document.end());
99   const char* begin = document_.c_str();
100   const char* end = begin + document_.length();
101   return parse(begin, end, root, collectComments);
102 }
103 
parse(std::istream & is,Value & root,bool collectComments)104 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
105   // std::istream_iterator<char> begin(is);
106   // std::istream_iterator<char> end;
107   // Those would allow streamed input from a file, if parse() were a
108   // template function.
109 
110   // Since String is reference-counted, this at least does not
111   // create an extra copy.
112   String doc;
113   std::getline(is, doc, static_cast<char> EOF);
114   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
115 }
116 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)117 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
118                    bool collectComments) {
119   if (!features_.allowComments_) {
120     collectComments = false;
121   }
122 
123   begin_ = beginDoc;
124   end_ = endDoc;
125   collectComments_ = collectComments;
126   current_ = begin_;
127   lastValueEnd_ = JSONCPP_NULL;
128   lastValue_ = JSONCPP_NULL;
129   commentsBefore_.clear();
130   errors_.clear();
131   while (!nodes_.empty())
132     nodes_.pop();
133   nodes_.push(&root);
134 
135   bool successful = readValue();
136   Token token;
137   skipCommentTokens(token);
138   if (collectComments_ && !commentsBefore_.empty())
139     root.setComment(commentsBefore_, commentAfter);
140   if (features_.strictRoot_) {
141     if (!root.isArray() && !root.isObject()) {
142       // Set error location to start of doc, ideally should be first token found
143       // in doc
144       token.type_ = tokenError;
145       token.start_ = beginDoc;
146       token.end_ = endDoc;
147       addError(
148           "A valid JSON document must be either an array or an object value.",
149           token);
150       return false;
151     }
152   }
153   return successful;
154 }
155 
readValue()156 bool Reader::readValue() {
157   // readValue() may call itself only if it calls readObject() or ReadArray().
158   // These methods execute nodes_.push() just before and nodes_.pop)() just
159   // after calling readValue(). parse() executes one nodes_.push(), so > instead
160   // of >=.
161   if (nodes_.size() > stackLimit_g)
162     throwRuntimeError("Exceeded stackLimit in readValue().");
163 
164   Token token;
165   skipCommentTokens(token);
166   bool successful = true;
167 
168   if (collectComments_ && !commentsBefore_.empty()) {
169     currentValue().setComment(commentsBefore_, commentBefore);
170     commentsBefore_.clear();
171   }
172 
173   switch (token.type_) {
174   case tokenObjectBegin:
175     successful = readObject(token);
176     currentValue().setOffsetLimit(current_ - begin_);
177     break;
178   case tokenArrayBegin:
179     successful = readArray(token);
180     currentValue().setOffsetLimit(current_ - begin_);
181     break;
182   case tokenNumber:
183     successful = decodeNumber(token);
184     break;
185   case tokenString:
186     successful = decodeString(token);
187     break;
188   case tokenTrue: {
189     Value v(true);
190     currentValue().swapPayload(v);
191     currentValue().setOffsetStart(token.start_ - begin_);
192     currentValue().setOffsetLimit(token.end_ - begin_);
193   } break;
194   case tokenFalse: {
195     Value v(false);
196     currentValue().swapPayload(v);
197     currentValue().setOffsetStart(token.start_ - begin_);
198     currentValue().setOffsetLimit(token.end_ - begin_);
199   } break;
200   case tokenNull: {
201     Value v;
202     currentValue().swapPayload(v);
203     currentValue().setOffsetStart(token.start_ - begin_);
204     currentValue().setOffsetLimit(token.end_ - begin_);
205   } break;
206   case tokenArraySeparator:
207   case tokenObjectEnd:
208   case tokenArrayEnd:
209     if (features_.allowDroppedNullPlaceholders_) {
210       // "Un-read" the current token and mark the current value as a null
211       // token.
212       current_--;
213       Value v;
214       currentValue().swapPayload(v);
215       currentValue().setOffsetStart(current_ - begin_ - 1);
216       currentValue().setOffsetLimit(current_ - begin_);
217       break;
218     } // Else, fall through...
219   default:
220     currentValue().setOffsetStart(token.start_ - begin_);
221     currentValue().setOffsetLimit(token.end_ - begin_);
222     return addError("Syntax error: value, object or array expected.", token);
223   }
224 
225   if (collectComments_) {
226     lastValueEnd_ = current_;
227     lastValue_ = &currentValue();
228   }
229 
230   return successful;
231 }
232 
skipCommentTokens(Token & token)233 void Reader::skipCommentTokens(Token& token) {
234   if (features_.allowComments_) {
235     do {
236       readToken(token);
237     } while (token.type_ == tokenComment);
238   } else {
239     readToken(token);
240   }
241 }
242 
readToken(Token & token)243 bool Reader::readToken(Token& token) {
244   skipSpaces();
245   token.start_ = current_;
246   Char c = getNextChar();
247   bool ok = true;
248   switch (c) {
249   case '{':
250     token.type_ = tokenObjectBegin;
251     break;
252   case '}':
253     token.type_ = tokenObjectEnd;
254     break;
255   case '[':
256     token.type_ = tokenArrayBegin;
257     break;
258   case ']':
259     token.type_ = tokenArrayEnd;
260     break;
261   case '"':
262     token.type_ = tokenString;
263     ok = readString();
264     break;
265   case '/':
266     token.type_ = tokenComment;
267     ok = readComment();
268     break;
269   case '0':
270   case '1':
271   case '2':
272   case '3':
273   case '4':
274   case '5':
275   case '6':
276   case '7':
277   case '8':
278   case '9':
279   case '-':
280     token.type_ = tokenNumber;
281     readNumber();
282     break;
283   case 't':
284     token.type_ = tokenTrue;
285     ok = match("rue", 3);
286     break;
287   case 'f':
288     token.type_ = tokenFalse;
289     ok = match("alse", 4);
290     break;
291   case 'n':
292     token.type_ = tokenNull;
293     ok = match("ull", 3);
294     break;
295   case ',':
296     token.type_ = tokenArraySeparator;
297     break;
298   case ':':
299     token.type_ = tokenMemberSeparator;
300     break;
301   case 0:
302     token.type_ = tokenEndOfStream;
303     break;
304   default:
305     ok = false;
306     break;
307   }
308   if (!ok)
309     token.type_ = tokenError;
310   token.end_ = current_;
311   return ok;
312 }
313 
skipSpaces()314 void Reader::skipSpaces() {
315   while (current_ != end_) {
316     Char c = *current_;
317     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
318       ++current_;
319     else
320       break;
321   }
322 }
323 
match(const Char * pattern,int patternLength)324 bool Reader::match(const Char* pattern, int patternLength) {
325   if (end_ - current_ < patternLength)
326     return false;
327   int index = patternLength;
328   while (index--)
329     if (current_[index] != pattern[index])
330       return false;
331   current_ += patternLength;
332   return true;
333 }
334 
readComment()335 bool Reader::readComment() {
336   Location commentBegin = current_ - 1;
337   Char c = getNextChar();
338   bool successful = false;
339   if (c == '*')
340     successful = readCStyleComment();
341   else if (c == '/')
342     successful = readCppStyleComment();
343   if (!successful)
344     return false;
345 
346   if (collectComments_) {
347     CommentPlacement placement = commentBefore;
348     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
349       if (c != '*' || !containsNewLine(commentBegin, current_))
350         placement = commentAfterOnSameLine;
351     }
352 
353     addComment(commentBegin, current_, placement);
354   }
355   return true;
356 }
357 
normalizeEOL(Reader::Location begin,Reader::Location end)358 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
359   String normalized;
360   normalized.reserve(static_cast<size_t>(end - begin));
361   Reader::Location current = begin;
362   while (current != end) {
363     char c = *current++;
364     if (c == '\r') {
365       if (current != end && *current == '\n')
366         // convert dos EOL
367         ++current;
368       // convert Mac EOL
369       normalized += '\n';
370     } else {
371       normalized += c;
372     }
373   }
374   return normalized;
375 }
376 
addComment(Location begin,Location end,CommentPlacement placement)377 void Reader::addComment(Location begin, Location end,
378                         CommentPlacement placement) {
379   assert(collectComments_);
380   const String& normalized = normalizeEOL(begin, end);
381   if (placement == commentAfterOnSameLine) {
382     assert(lastValue_ != JSONCPP_NULL);
383     lastValue_->setComment(normalized, placement);
384   } else {
385     commentsBefore_ += normalized;
386   }
387 }
388 
readCStyleComment()389 bool Reader::readCStyleComment() {
390   while ((current_ + 1) < end_) {
391     Char c = getNextChar();
392     if (c == '*' && *current_ == '/')
393       break;
394   }
395   return getNextChar() == '/';
396 }
397 
readCppStyleComment()398 bool Reader::readCppStyleComment() {
399   while (current_ != end_) {
400     Char c = getNextChar();
401     if (c == '\n')
402       break;
403     if (c == '\r') {
404       // Consume DOS EOL. It will be normalized in addComment.
405       if (current_ != end_ && *current_ == '\n')
406         getNextChar();
407       // Break on Moc OS 9 EOL.
408       break;
409     }
410   }
411   return true;
412 }
413 
readNumber()414 void Reader::readNumber() {
415   Location p = current_;
416   char c = '0'; // stopgap for already consumed character
417   // integral part
418   while (c >= '0' && c <= '9')
419     c = (current_ = p) < end_ ? *p++ : '\0';
420   // fractional part
421   if (c == '.') {
422     c = (current_ = p) < end_ ? *p++ : '\0';
423     while (c >= '0' && c <= '9')
424       c = (current_ = p) < end_ ? *p++ : '\0';
425   }
426   // exponential part
427   if (c == 'e' || c == 'E') {
428     c = (current_ = p) < end_ ? *p++ : '\0';
429     if (c == '+' || c == '-')
430       c = (current_ = p) < end_ ? *p++ : '\0';
431     while (c >= '0' && c <= '9')
432       c = (current_ = p) < end_ ? *p++ : '\0';
433   }
434 }
435 
readString()436 bool Reader::readString() {
437   Char c = '\0';
438   while (current_ != end_) {
439     c = getNextChar();
440     if (c == '\\')
441       getNextChar();
442     else if (c == '"')
443       break;
444   }
445   return c == '"';
446 }
447 
readObject(Token & token)448 bool Reader::readObject(Token& token) {
449   Token tokenName;
450   String name;
451   Value init(objectValue);
452   currentValue().swapPayload(init);
453   currentValue().setOffsetStart(token.start_ - begin_);
454   while (readToken(tokenName)) {
455     bool initialTokenOk = true;
456     while (tokenName.type_ == tokenComment && initialTokenOk)
457       initialTokenOk = readToken(tokenName);
458     if (!initialTokenOk)
459       break;
460     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
461       return true;
462     name.clear();
463     if (tokenName.type_ == tokenString) {
464       if (!decodeString(tokenName, name))
465         return recoverFromError(tokenObjectEnd);
466     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
467       Value numberName;
468       if (!decodeNumber(tokenName, numberName))
469         return recoverFromError(tokenObjectEnd);
470       name = numberName.asString();
471     } else {
472       break;
473     }
474 
475     Token colon;
476     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
477       return addErrorAndRecover("Missing ':' after object member name", colon,
478                                 tokenObjectEnd);
479     }
480     Value& value = currentValue()[name];
481     nodes_.push(&value);
482     bool ok = readValue();
483     nodes_.pop();
484     if (!ok) // error already set
485       return recoverFromError(tokenObjectEnd);
486 
487     Token comma;
488     if (!readToken(comma) ||
489         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
490          comma.type_ != tokenComment)) {
491       return addErrorAndRecover("Missing ',' or '}' in object declaration",
492                                 comma, tokenObjectEnd);
493     }
494     bool finalizeTokenOk = true;
495     while (comma.type_ == tokenComment && finalizeTokenOk)
496       finalizeTokenOk = readToken(comma);
497     if (comma.type_ == tokenObjectEnd)
498       return true;
499   }
500   return addErrorAndRecover("Missing '}' or object member name", tokenName,
501                             tokenObjectEnd);
502 }
503 
readArray(Token & token)504 bool Reader::readArray(Token& token) {
505   Value init(arrayValue);
506   currentValue().swapPayload(init);
507   currentValue().setOffsetStart(token.start_ - begin_);
508   skipSpaces();
509   if (current_ != end_ && *current_ == ']') // empty array
510   {
511     Token endArray;
512     readToken(endArray);
513     return true;
514   }
515   int index = 0;
516   for (;;) {
517     Value& value = currentValue()[index++];
518     nodes_.push(&value);
519     bool ok = readValue();
520     nodes_.pop();
521     if (!ok) // error already set
522       return recoverFromError(tokenArrayEnd);
523 
524     Token currentToken;
525     // Accept Comment after last item in the array.
526     ok = readToken(currentToken);
527     while (currentToken.type_ == tokenComment && ok) {
528       ok = readToken(currentToken);
529     }
530     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
531                          currentToken.type_ != tokenArrayEnd);
532     if (!ok || badTokenType) {
533       return addErrorAndRecover("Missing ',' or ']' in array declaration",
534                                 currentToken, tokenArrayEnd);
535     }
536     if (currentToken.type_ == tokenArrayEnd)
537       break;
538   }
539   return true;
540 }
541 
decodeNumber(Token & token)542 bool Reader::decodeNumber(Token& token) {
543   Value decoded;
544   if (!decodeNumber(token, decoded))
545     return false;
546   currentValue().swapPayload(decoded);
547   currentValue().setOffsetStart(token.start_ - begin_);
548   currentValue().setOffsetLimit(token.end_ - begin_);
549   return true;
550 }
551 
decodeNumber(Token & token,Value & decoded)552 bool Reader::decodeNumber(Token& token, Value& decoded) {
553   // Attempts to parse the number as an integer. If the number is
554   // larger than the maximum supported value of an integer then
555   // we decode the number as a double.
556   Location current = token.start_;
557   bool isNegative = *current == '-';
558   if (isNegative)
559     ++current;
560   // TODO: Help the compiler do the div and mod at compile time or get rid of
561   // them.
562   Value::LargestUInt maxIntegerValue =
563       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
564                  : Value::maxLargestUInt;
565   Value::LargestUInt threshold = maxIntegerValue / 10;
566   Value::LargestUInt value = 0;
567   while (current < token.end_) {
568     Char c = *current++;
569     if (c < '0' || c > '9')
570       return decodeDouble(token, decoded);
571     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
572     if (value >= threshold) {
573       // We've hit or exceeded the max value divided by 10 (rounded down). If
574       // a) we've only just touched the limit, b) this is the last digit, and
575       // c) it's small enough to fit in that rounding delta, we're okay.
576       // Otherwise treat this number as a double to avoid overflow.
577       if (value > threshold || current != token.end_ ||
578           digit > maxIntegerValue % 10) {
579         return decodeDouble(token, decoded);
580       }
581     }
582     value = value * 10 + digit;
583   }
584   if (isNegative && value == maxIntegerValue)
585     decoded = Value::minLargestInt;
586   else if (isNegative)
587     decoded = -Value::LargestInt(value);
588   else if (value <= Value::LargestUInt(Value::maxInt))
589     decoded = Value::LargestInt(value);
590   else
591     decoded = value;
592   return true;
593 }
594 
decodeDouble(Token & token)595 bool Reader::decodeDouble(Token& token) {
596   Value decoded;
597   if (!decodeDouble(token, decoded))
598     return false;
599   currentValue().swapPayload(decoded);
600   currentValue().setOffsetStart(token.start_ - begin_);
601   currentValue().setOffsetLimit(token.end_ - begin_);
602   return true;
603 }
604 
decodeDouble(Token & token,Value & decoded)605 bool Reader::decodeDouble(Token& token, Value& decoded) {
606   double value = 0;
607   String buffer(token.start_, token.end_);
608   IStringStream is(buffer);
609   if (!(is >> value))
610     return addError(
611         "'" + String(token.start_, token.end_) + "' is not a number.", token);
612   decoded = value;
613   return true;
614 }
615 
decodeString(Token & token)616 bool Reader::decodeString(Token& token) {
617   String decoded_string;
618   if (!decodeString(token, decoded_string))
619     return false;
620   Value decoded(decoded_string);
621   currentValue().swapPayload(decoded);
622   currentValue().setOffsetStart(token.start_ - begin_);
623   currentValue().setOffsetLimit(token.end_ - begin_);
624   return true;
625 }
626 
decodeString(Token & token,String & decoded)627 bool Reader::decodeString(Token& token, String& decoded) {
628   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
629   Location current = token.start_ + 1; // skip '"'
630   Location end = token.end_ - 1;       // do not include '"'
631   while (current != end) {
632     Char c = *current++;
633     if (c == '"')
634       break;
635     if (c == '\\') {
636       if (current == end)
637         return addError("Empty escape sequence in string", token, current);
638       Char escape = *current++;
639       switch (escape) {
640       case '"':
641         decoded += '"';
642         break;
643       case '/':
644         decoded += '/';
645         break;
646       case '\\':
647         decoded += '\\';
648         break;
649       case 'b':
650         decoded += '\b';
651         break;
652       case 'f':
653         decoded += '\f';
654         break;
655       case 'n':
656         decoded += '\n';
657         break;
658       case 'r':
659         decoded += '\r';
660         break;
661       case 't':
662         decoded += '\t';
663         break;
664       case 'u': {
665         unsigned int unicode;
666         if (!decodeUnicodeCodePoint(token, current, end, unicode))
667           return false;
668         decoded += codePointToUTF8(unicode);
669       } break;
670       default:
671         return addError("Bad escape sequence in string", token, current);
672       }
673     } else {
674       decoded += c;
675     }
676   }
677   return true;
678 }
679 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)680 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
681                                     Location end, unsigned int& unicode) {
682 
683   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
684     return false;
685   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
686     // surrogate pairs
687     if (end - current < 6)
688       return addError(
689           "additional six characters expected to parse unicode surrogate pair.",
690           token, current);
691     if (*(current++) == '\\' && *(current++) == 'u') {
692       unsigned int surrogatePair;
693       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
694         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
695       } else
696         return false;
697     } else
698       return addError("expecting another \\u token to begin the second half of "
699                       "a unicode surrogate pair",
700                       token, current);
701   }
702   return true;
703 }
704 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)705 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
706                                          Location end,
707                                          unsigned int& ret_unicode) {
708   if (end - current < 4)
709     return addError(
710         "Bad unicode escape sequence in string: four digits expected.", token,
711         current);
712   int unicode = 0;
713   for (int index = 0; index < 4; ++index) {
714     Char c = *current++;
715     unicode *= 16;
716     if (c >= '0' && c <= '9')
717       unicode += c - '0';
718     else if (c >= 'a' && c <= 'f')
719       unicode += c - 'a' + 10;
720     else if (c >= 'A' && c <= 'F')
721       unicode += c - 'A' + 10;
722     else
723       return addError(
724           "Bad unicode escape sequence in string: hexadecimal digit expected.",
725           token, current);
726   }
727   ret_unicode = static_cast<unsigned int>(unicode);
728   return true;
729 }
730 
addError(const String & message,Token & token,Location extra)731 bool Reader::addError(const String& message, Token& token, Location extra) {
732   ErrorInfo info;
733   info.token_ = token;
734   info.message_ = message;
735   info.extra_ = extra;
736   errors_.push_back(info);
737   return false;
738 }
739 
recoverFromError(TokenType skipUntilToken)740 bool Reader::recoverFromError(TokenType skipUntilToken) {
741   size_t const errorCount = errors_.size();
742   Token skip;
743   for (;;) {
744     if (!readToken(skip))
745       errors_.resize(errorCount); // discard errors caused by recovery
746     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
747       break;
748   }
749   errors_.resize(errorCount);
750   return false;
751 }
752 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)753 bool Reader::addErrorAndRecover(const String& message, Token& token,
754                                 TokenType skipUntilToken) {
755   addError(message, token);
756   return recoverFromError(skipUntilToken);
757 }
758 
currentValue()759 Value& Reader::currentValue() { return *(nodes_.top()); }
760 
getNextChar()761 Reader::Char Reader::getNextChar() {
762   if (current_ == end_)
763     return 0;
764   return *current_++;
765 }
766 
getLocationLineAndColumn(Location location,int & line,int & column) const767 void Reader::getLocationLineAndColumn(Location location, int& line,
768                                       int& column) const {
769   Location current = begin_;
770   Location lastLineStart = current;
771   line = 0;
772   while (current < location && current != end_) {
773     Char c = *current++;
774     if (c == '\r') {
775       if (*current == '\n')
776         ++current;
777       lastLineStart = current;
778       ++line;
779     } else if (c == '\n') {
780       lastLineStart = current;
781       ++line;
782     }
783   }
784   // column & line start at 1
785   column = int(location - lastLineStart) + 1;
786   ++line;
787 }
788 
getLocationLineAndColumn(Location location) const789 String Reader::getLocationLineAndColumn(Location location) const {
790   int line, column;
791   getLocationLineAndColumn(location, line, column);
792   char buffer[18 + 16 + 16 + 1];
793   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
794   return buffer;
795 }
796 
797 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const798 String Reader::getFormatedErrorMessages() const {
799   return getFormattedErrorMessages();
800 }
801 
getFormattedErrorMessages() const802 String Reader::getFormattedErrorMessages() const {
803   String formattedMessage;
804   for (Errors::const_iterator itError = errors_.begin();
805        itError != errors_.end(); ++itError) {
806     const ErrorInfo& error = *itError;
807     formattedMessage +=
808         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
809     formattedMessage += "  " + error.message_ + "\n";
810     if (error.extra_)
811       formattedMessage +=
812           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
813   }
814   return formattedMessage;
815 }
816 
getStructuredErrors() const817 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
818   std::vector<Reader::StructuredError> allErrors;
819   for (Errors::const_iterator itError = errors_.begin();
820        itError != errors_.end(); ++itError) {
821     const ErrorInfo& error = *itError;
822     Reader::StructuredError structured;
823     structured.offset_start = error.token_.start_ - begin_;
824     structured.offset_limit = error.token_.end_ - begin_;
825     structured.message = error.message_;
826     allErrors.push_back(structured);
827   }
828   return allErrors;
829 }
830 
pushError(const Value & value,const String & message)831 bool Reader::pushError(const Value& value, const String& message) {
832   ptrdiff_t const length = end_ - begin_;
833   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
834     return false;
835   Token token;
836   token.type_ = tokenError;
837   token.start_ = begin_ + value.getOffsetStart();
838   token.end_ = begin_ + value.getOffsetLimit();
839   ErrorInfo info;
840   info.token_ = token;
841   info.message_ = message;
842   info.extra_ = JSONCPP_NULL;
843   errors_.push_back(info);
844   return true;
845 }
846 
pushError(const Value & value,const String & message,const Value & extra)847 bool Reader::pushError(const Value& value, const String& message,
848                        const Value& extra) {
849   ptrdiff_t const length = end_ - begin_;
850   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
851       extra.getOffsetLimit() > length)
852     return false;
853   Token token;
854   token.type_ = tokenError;
855   token.start_ = begin_ + value.getOffsetStart();
856   token.end_ = begin_ + value.getOffsetLimit();
857   ErrorInfo info;
858   info.token_ = token;
859   info.message_ = message;
860   info.extra_ = begin_ + extra.getOffsetStart();
861   errors_.push_back(info);
862   return true;
863 }
864 
good() const865 bool Reader::good() const { return errors_.empty(); }
866 
867 // Originally copied from the Features class (now deprecated), used internally
868 // for features implementation.
869 class OurFeatures {
870 public:
871   static OurFeatures all();
872   bool allowComments_;
873   bool allowTrailingCommas_;
874   bool strictRoot_;
875   bool allowDroppedNullPlaceholders_;
876   bool allowNumericKeys_;
877   bool allowSingleQuotes_;
878   bool failIfExtra_;
879   bool rejectDupKeys_;
880   bool allowSpecialFloats_;
881   bool skipBom_;
882   size_t stackLimit_;
883 }; // OurFeatures
884 
all()885 OurFeatures OurFeatures::all() { return OurFeatures(); }
886 
887 // Implementation of class Reader
888 // ////////////////////////////////
889 
890 // Originally copied from the Reader class (now deprecated), used internally
891 // for implementing JSON reading.
892 class OurReader {
893 public:
894   typedef char Char;
895   typedef const Char* Location;
896   struct StructuredError {
897     ptrdiff_t offset_start;
898     ptrdiff_t offset_limit;
899     String message;
900   };
901 
902   JSONCPP_OP_EXPLICIT OurReader(OurFeatures const& features);
903   bool parse(const char* beginDoc, const char* endDoc, Value& root,
904              bool collectComments = true);
905   String getFormattedErrorMessages() const;
906   std::vector<StructuredError> getStructuredErrors() const;
907 
908 private:
909   OurReader(OurReader const&);      // no impl
910   void operator=(OurReader const&); // no impl
911 
912   enum TokenType {
913     tokenEndOfStream = 0,
914     tokenObjectBegin,
915     tokenObjectEnd,
916     tokenArrayBegin,
917     tokenArrayEnd,
918     tokenString,
919     tokenNumber,
920     tokenTrue,
921     tokenFalse,
922     tokenNull,
923     tokenNaN,
924     tokenPosInf,
925     tokenNegInf,
926     tokenArraySeparator,
927     tokenMemberSeparator,
928     tokenComment,
929     tokenError
930   };
931 
932   class Token {
933   public:
934     TokenType type_;
935     Location start_;
936     Location end_;
937   };
938 
939   class ErrorInfo {
940   public:
941     Token token_;
942     String message_;
943     Location extra_;
944   };
945 
946   typedef std::deque<ErrorInfo> Errors;
947 
948   bool readToken(Token& token);
949   void skipSpaces();
950   void skipBom(bool skipBom);
951   bool match(const Char* pattern, int patternLength);
952   bool readComment();
953   bool readCStyleComment(bool* containsNewLineResult);
954   bool readCppStyleComment();
955   bool readString();
956   bool readStringSingleQuote();
957   bool readNumber(bool checkInf);
958   bool readValue();
959   bool readObject(Token& token);
960   bool readArray(Token& token);
961   bool decodeNumber(Token& token);
962   bool decodeNumber(Token& token, Value& decoded);
963   bool decodeString(Token& token);
964   bool decodeString(Token& token, String& decoded);
965   bool decodeDouble(Token& token);
966   bool decodeDouble(Token& token, Value& decoded);
967   bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
968                               unsigned int& unicode);
969   bool decodeUnicodeEscapeSequence(Token& token, Location& current,
970                                    Location end, unsigned int& unicode);
971   bool addError(const String& message, Token& token,
972                 Location extra = JSONCPP_NULL);
973   bool recoverFromError(TokenType skipUntilToken);
974   bool addErrorAndRecover(const String& message, Token& token,
975                           TokenType skipUntilToken);
976   void skipUntilSpace();
977   Value& currentValue();
978   Char getNextChar();
979   void getLocationLineAndColumn(Location location, int& line,
980                                 int& column) const;
981   String getLocationLineAndColumn(Location location) const;
982   void addComment(Location begin, Location end, CommentPlacement placement);
983   void skipCommentTokens(Token& token);
984 
985   static String normalizeEOL(Location begin, Location end);
986   static bool containsNewLine(Location begin, Location end);
987 
988   typedef std::stack<Value*> Nodes;
989 
990   Nodes nodes_;
991   Errors errors_;
992   String document_;
993   Location begin_;
994   Location end_;
995   Location current_;
996   Location lastValueEnd_;
997   Value* lastValue_;
998   bool lastValueHasAComment_;
999   String commentsBefore_;
1000 
1001   OurFeatures const features_;
1002   bool collectComments_;
1003 }; // OurReader
1004 
1005 // complete copy of Read impl, for OurReader
1006 
containsNewLine(OurReader::Location begin,OurReader::Location end)1007 bool OurReader::containsNewLine(OurReader::Location begin,
1008                                 OurReader::Location end) {
1009   for (; begin < end; ++begin)
1010     if (*begin == '\n' || *begin == '\r')
1011       return true;
1012   return false;
1013 }
1014 
OurReader(OurFeatures const & features)1015 OurReader::OurReader(OurFeatures const& features)
1016     : errors_(), document_(), begin_(JSONCPP_NULL), end_(JSONCPP_NULL),
1017       current_(JSONCPP_NULL), lastValueEnd_(JSONCPP_NULL),
1018       lastValue_(JSONCPP_NULL), lastValueHasAComment_(false), commentsBefore_(),
1019       features_(features), collectComments_(false) {}
1020 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1021 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1022                       bool collectComments) {
1023   if (!features_.allowComments_) {
1024     collectComments = false;
1025   }
1026 
1027   begin_ = beginDoc;
1028   end_ = endDoc;
1029   collectComments_ = collectComments;
1030   current_ = begin_;
1031   lastValueEnd_ = JSONCPP_NULL;
1032   lastValue_ = JSONCPP_NULL;
1033   commentsBefore_.clear();
1034   errors_.clear();
1035   while (!nodes_.empty())
1036     nodes_.pop();
1037   nodes_.push(&root);
1038 
1039   // skip byte order mark if it exists at the beginning of the UTF-8 text.
1040   skipBom(features_.skipBom_);
1041   bool successful = readValue();
1042   nodes_.pop();
1043   Token token;
1044   skipCommentTokens(token);
1045   if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1046     addError("Extra non-whitespace after JSON value.", token);
1047     return false;
1048   }
1049   if (collectComments_ && !commentsBefore_.empty())
1050     root.setComment(commentsBefore_, commentAfter);
1051   if (features_.strictRoot_) {
1052     if (!root.isArray() && !root.isObject()) {
1053       // Set error location to start of doc, ideally should be first token found
1054       // in doc
1055       token.type_ = tokenError;
1056       token.start_ = beginDoc;
1057       token.end_ = endDoc;
1058       addError(
1059           "A valid JSON document must be either an array or an object value.",
1060           token);
1061       return false;
1062     }
1063   }
1064   return successful;
1065 }
1066 
readValue()1067 bool OurReader::readValue() {
1068   //  To preserve the old behaviour we cast size_t to int.
1069   if (nodes_.size() > features_.stackLimit_)
1070     throwRuntimeError("Exceeded stackLimit in readValue().");
1071   Token token;
1072   skipCommentTokens(token);
1073   bool successful = true;
1074 
1075   if (collectComments_ && !commentsBefore_.empty()) {
1076     currentValue().setComment(commentsBefore_, commentBefore);
1077     commentsBefore_.clear();
1078   }
1079 
1080   switch (token.type_) {
1081   case tokenObjectBegin:
1082     successful = readObject(token);
1083     currentValue().setOffsetLimit(current_ - begin_);
1084     break;
1085   case tokenArrayBegin:
1086     successful = readArray(token);
1087     currentValue().setOffsetLimit(current_ - begin_);
1088     break;
1089   case tokenNumber:
1090     successful = decodeNumber(token);
1091     break;
1092   case tokenString:
1093     successful = decodeString(token);
1094     break;
1095   case tokenTrue: {
1096     Value v(true);
1097     currentValue().swapPayload(v);
1098     currentValue().setOffsetStart(token.start_ - begin_);
1099     currentValue().setOffsetLimit(token.end_ - begin_);
1100   } break;
1101   case tokenFalse: {
1102     Value v(false);
1103     currentValue().swapPayload(v);
1104     currentValue().setOffsetStart(token.start_ - begin_);
1105     currentValue().setOffsetLimit(token.end_ - begin_);
1106   } break;
1107   case tokenNull: {
1108     Value v;
1109     currentValue().swapPayload(v);
1110     currentValue().setOffsetStart(token.start_ - begin_);
1111     currentValue().setOffsetLimit(token.end_ - begin_);
1112   } break;
1113   case tokenNaN: {
1114     Value v(std::numeric_limits<double>::quiet_NaN());
1115     currentValue().swapPayload(v);
1116     currentValue().setOffsetStart(token.start_ - begin_);
1117     currentValue().setOffsetLimit(token.end_ - begin_);
1118   } break;
1119   case tokenPosInf: {
1120     Value v(std::numeric_limits<double>::infinity());
1121     currentValue().swapPayload(v);
1122     currentValue().setOffsetStart(token.start_ - begin_);
1123     currentValue().setOffsetLimit(token.end_ - begin_);
1124   } break;
1125   case tokenNegInf: {
1126     Value v(-std::numeric_limits<double>::infinity());
1127     currentValue().swapPayload(v);
1128     currentValue().setOffsetStart(token.start_ - begin_);
1129     currentValue().setOffsetLimit(token.end_ - begin_);
1130   } break;
1131   case tokenArraySeparator:
1132   case tokenObjectEnd:
1133   case tokenArrayEnd:
1134     if (features_.allowDroppedNullPlaceholders_) {
1135       // "Un-read" the current token and mark the current value as a null
1136       // token.
1137       current_--;
1138       Value v;
1139       currentValue().swapPayload(v);
1140       currentValue().setOffsetStart(current_ - begin_ - 1);
1141       currentValue().setOffsetLimit(current_ - begin_);
1142       break;
1143     } // else, fall through ...
1144   default:
1145     currentValue().setOffsetStart(token.start_ - begin_);
1146     currentValue().setOffsetLimit(token.end_ - begin_);
1147     return addError("Syntax error: value, object or array expected.", token);
1148   }
1149 
1150   if (collectComments_) {
1151     lastValueEnd_ = current_;
1152     lastValueHasAComment_ = false;
1153     lastValue_ = &currentValue();
1154   }
1155 
1156   return successful;
1157 }
1158 
skipCommentTokens(Token & token)1159 void OurReader::skipCommentTokens(Token& token) {
1160   if (features_.allowComments_) {
1161     do {
1162       readToken(token);
1163     } while (token.type_ == tokenComment);
1164   } else {
1165     readToken(token);
1166   }
1167 }
1168 
readToken(Token & token)1169 bool OurReader::readToken(Token& token) {
1170   skipSpaces();
1171   token.start_ = current_;
1172   Char c = getNextChar();
1173   bool ok = true;
1174   switch (c) {
1175   case '{':
1176     token.type_ = tokenObjectBegin;
1177     break;
1178   case '}':
1179     token.type_ = tokenObjectEnd;
1180     break;
1181   case '[':
1182     token.type_ = tokenArrayBegin;
1183     break;
1184   case ']':
1185     token.type_ = tokenArrayEnd;
1186     break;
1187   case '"':
1188     token.type_ = tokenString;
1189     ok = readString();
1190     break;
1191   case '\'':
1192     if (features_.allowSingleQuotes_) {
1193       token.type_ = tokenString;
1194       ok = readStringSingleQuote();
1195       break;
1196     } // else fall through
1197   case '/':
1198     token.type_ = tokenComment;
1199     ok = readComment();
1200     break;
1201   case '0':
1202   case '1':
1203   case '2':
1204   case '3':
1205   case '4':
1206   case '5':
1207   case '6':
1208   case '7':
1209   case '8':
1210   case '9':
1211     token.type_ = tokenNumber;
1212     readNumber(false);
1213     break;
1214   case '-':
1215     if (readNumber(true)) {
1216       token.type_ = tokenNumber;
1217     } else {
1218       token.type_ = tokenNegInf;
1219       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1220     }
1221     break;
1222   case '+':
1223     if (readNumber(true)) {
1224       token.type_ = tokenNumber;
1225     } else {
1226       token.type_ = tokenPosInf;
1227       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1228     }
1229     break;
1230   case 't':
1231     token.type_ = tokenTrue;
1232     ok = match("rue", 3);
1233     break;
1234   case 'f':
1235     token.type_ = tokenFalse;
1236     ok = match("alse", 4);
1237     break;
1238   case 'n':
1239     token.type_ = tokenNull;
1240     ok = match("ull", 3);
1241     break;
1242   case 'N':
1243     if (features_.allowSpecialFloats_) {
1244       token.type_ = tokenNaN;
1245       ok = match("aN", 2);
1246     } else {
1247       ok = false;
1248     }
1249     break;
1250   case 'I':
1251     if (features_.allowSpecialFloats_) {
1252       token.type_ = tokenPosInf;
1253       ok = match("nfinity", 7);
1254     } else {
1255       ok = false;
1256     }
1257     break;
1258   case ',':
1259     token.type_ = tokenArraySeparator;
1260     break;
1261   case ':':
1262     token.type_ = tokenMemberSeparator;
1263     break;
1264   case 0:
1265     token.type_ = tokenEndOfStream;
1266     break;
1267   default:
1268     ok = false;
1269     break;
1270   }
1271   if (!ok)
1272     token.type_ = tokenError;
1273   token.end_ = current_;
1274   return ok;
1275 }
1276 
skipSpaces()1277 void OurReader::skipSpaces() {
1278   while (current_ != end_) {
1279     Char c = *current_;
1280     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1281       ++current_;
1282     else
1283       break;
1284   }
1285 }
1286 
skipBom(bool skipBom)1287 void OurReader::skipBom(bool skipBom) {
1288   // The default behavior is to skip BOM.
1289   if (skipBom) {
1290     if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1291       begin_ += 3;
1292       current_ = begin_;
1293     }
1294   }
1295 }
1296 
match(const Char * pattern,int patternLength)1297 bool OurReader::match(const Char* pattern, int patternLength) {
1298   if (end_ - current_ < patternLength)
1299     return false;
1300   int index = patternLength;
1301   while (index--)
1302     if (current_[index] != pattern[index])
1303       return false;
1304   current_ += patternLength;
1305   return true;
1306 }
1307 
readComment()1308 bool OurReader::readComment() {
1309   const Location commentBegin = current_ - 1;
1310   const Char c = getNextChar();
1311   bool successful = false;
1312   bool cStyleWithEmbeddedNewline = false;
1313 
1314   const bool isCStyleComment = (c == '*');
1315   const bool isCppStyleComment = (c == '/');
1316   if (isCStyleComment) {
1317     successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1318   } else if (isCppStyleComment) {
1319     successful = readCppStyleComment();
1320   }
1321 
1322   if (!successful)
1323     return false;
1324 
1325   if (collectComments_) {
1326     CommentPlacement placement = commentBefore;
1327 
1328     if (!lastValueHasAComment_) {
1329       if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1330         if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1331           placement = commentAfterOnSameLine;
1332           lastValueHasAComment_ = true;
1333         }
1334       }
1335     }
1336 
1337     addComment(commentBegin, current_, placement);
1338   }
1339   return true;
1340 }
1341 
normalizeEOL(OurReader::Location begin,OurReader::Location end)1342 String OurReader::normalizeEOL(OurReader::Location begin,
1343                                OurReader::Location end) {
1344   String normalized;
1345   normalized.reserve(static_cast<size_t>(end - begin));
1346   OurReader::Location current = begin;
1347   while (current != end) {
1348     char c = *current++;
1349     if (c == '\r') {
1350       if (current != end && *current == '\n')
1351         // convert dos EOL
1352         ++current;
1353       // convert Mac EOL
1354       normalized += '\n';
1355     } else {
1356       normalized += c;
1357     }
1358   }
1359   return normalized;
1360 }
1361 
addComment(Location begin,Location end,CommentPlacement placement)1362 void OurReader::addComment(Location begin, Location end,
1363                            CommentPlacement placement) {
1364   assert(collectComments_);
1365   const String& normalized = normalizeEOL(begin, end);
1366   if (placement == commentAfterOnSameLine) {
1367     assert(lastValue_ != JSONCPP_NULL);
1368     lastValue_->setComment(normalized, placement);
1369   } else {
1370     commentsBefore_ += normalized;
1371   }
1372 }
1373 
readCStyleComment(bool * containsNewLineResult)1374 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1375   *containsNewLineResult = false;
1376 
1377   while ((current_ + 1) < end_) {
1378     Char c = getNextChar();
1379     if (c == '*' && *current_ == '/')
1380       break;
1381     if (c == '\n')
1382       *containsNewLineResult = true;
1383   }
1384 
1385   return getNextChar() == '/';
1386 }
1387 
readCppStyleComment()1388 bool OurReader::readCppStyleComment() {
1389   while (current_ != end_) {
1390     Char c = getNextChar();
1391     if (c == '\n')
1392       break;
1393     if (c == '\r') {
1394       // Consume DOS EOL. It will be normalized in addComment.
1395       if (current_ != end_ && *current_ == '\n')
1396         getNextChar();
1397       // Break on Moc OS 9 EOL.
1398       break;
1399     }
1400   }
1401   return true;
1402 }
1403 
readNumber(bool checkInf)1404 bool OurReader::readNumber(bool checkInf) {
1405   Location p = current_;
1406   if (checkInf && p != end_ && *p == 'I') {
1407     current_ = ++p;
1408     return false;
1409   }
1410   char c = '0'; // stopgap for already consumed character
1411   // integral part
1412   while (c >= '0' && c <= '9')
1413     c = (current_ = p) < end_ ? *p++ : '\0';
1414   // fractional part
1415   if (c == '.') {
1416     c = (current_ = p) < end_ ? *p++ : '\0';
1417     while (c >= '0' && c <= '9')
1418       c = (current_ = p) < end_ ? *p++ : '\0';
1419   }
1420   // exponential part
1421   if (c == 'e' || c == 'E') {
1422     c = (current_ = p) < end_ ? *p++ : '\0';
1423     if (c == '+' || c == '-')
1424       c = (current_ = p) < end_ ? *p++ : '\0';
1425     while (c >= '0' && c <= '9')
1426       c = (current_ = p) < end_ ? *p++ : '\0';
1427   }
1428   return true;
1429 }
readString()1430 bool OurReader::readString() {
1431   Char c = 0;
1432   while (current_ != end_) {
1433     c = getNextChar();
1434     if (c == '\\')
1435       getNextChar();
1436     else if (c == '"')
1437       break;
1438   }
1439   return c == '"';
1440 }
1441 
readStringSingleQuote()1442 bool OurReader::readStringSingleQuote() {
1443   Char c = 0;
1444   while (current_ != end_) {
1445     c = getNextChar();
1446     if (c == '\\')
1447       getNextChar();
1448     else if (c == '\'')
1449       break;
1450   }
1451   return c == '\'';
1452 }
1453 
readObject(Token & token)1454 bool OurReader::readObject(Token& token) {
1455   Token tokenName;
1456   String name;
1457   Value init(objectValue);
1458   currentValue().swapPayload(init);
1459   currentValue().setOffsetStart(token.start_ - begin_);
1460   while (readToken(tokenName)) {
1461     bool initialTokenOk = true;
1462     while (tokenName.type_ == tokenComment && initialTokenOk)
1463       initialTokenOk = readToken(tokenName);
1464     if (!initialTokenOk)
1465       break;
1466     if (tokenName.type_ == tokenObjectEnd &&
1467         (name.empty() ||
1468          features_.allowTrailingCommas_)) // empty object or trailing comma
1469       return true;
1470     name.clear();
1471     if (tokenName.type_ == tokenString) {
1472       if (!decodeString(tokenName, name))
1473         return recoverFromError(tokenObjectEnd);
1474     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1475       Value numberName;
1476       if (!decodeNumber(tokenName, numberName))
1477         return recoverFromError(tokenObjectEnd);
1478       name = numberName.asString();
1479     } else {
1480       break;
1481     }
1482     if (name.length() >= (1U << 30))
1483       throwRuntimeError("keylength >= 2^30");
1484     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1485       String msg = "Duplicate key: '" + name + "'";
1486       return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1487     }
1488 
1489     Token colon;
1490     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1491       return addErrorAndRecover("Missing ':' after object member name", colon,
1492                                 tokenObjectEnd);
1493     }
1494     Value& value = currentValue()[name];
1495     nodes_.push(&value);
1496     bool ok = readValue();
1497     nodes_.pop();
1498     if (!ok) // error already set
1499       return recoverFromError(tokenObjectEnd);
1500 
1501     Token comma;
1502     if (!readToken(comma) ||
1503         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1504          comma.type_ != tokenComment)) {
1505       return addErrorAndRecover("Missing ',' or '}' in object declaration",
1506                                 comma, tokenObjectEnd);
1507     }
1508     bool finalizeTokenOk = true;
1509     while (comma.type_ == tokenComment && finalizeTokenOk)
1510       finalizeTokenOk = readToken(comma);
1511     if (comma.type_ == tokenObjectEnd)
1512       return true;
1513   }
1514   return addErrorAndRecover("Missing '}' or object member name", tokenName,
1515                             tokenObjectEnd);
1516 }
1517 
readArray(Token & token)1518 bool OurReader::readArray(Token& token) {
1519   Value init(arrayValue);
1520   currentValue().swapPayload(init);
1521   currentValue().setOffsetStart(token.start_ - begin_);
1522   int index = 0;
1523   for (;;) {
1524     skipSpaces();
1525     if (current_ != end_ && *current_ == ']' &&
1526         (index == 0 ||
1527          (features_.allowTrailingCommas_ &&
1528           !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1529                                                       // comma
1530     {
1531       Token endArray;
1532       readToken(endArray);
1533       return true;
1534     }
1535     Value& value = currentValue()[index++];
1536     nodes_.push(&value);
1537     bool ok = readValue();
1538     nodes_.pop();
1539     if (!ok) // error already set
1540       return recoverFromError(tokenArrayEnd);
1541 
1542     Token currentToken;
1543     // Accept Comment after last item in the array.
1544     ok = readToken(currentToken);
1545     while (currentToken.type_ == tokenComment && ok) {
1546       ok = readToken(currentToken);
1547     }
1548     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1549                          currentToken.type_ != tokenArrayEnd);
1550     if (!ok || badTokenType) {
1551       return addErrorAndRecover("Missing ',' or ']' in array declaration",
1552                                 currentToken, tokenArrayEnd);
1553     }
1554     if (currentToken.type_ == tokenArrayEnd)
1555       break;
1556   }
1557   return true;
1558 }
1559 
decodeNumber(Token & token)1560 bool OurReader::decodeNumber(Token& token) {
1561   Value decoded;
1562   if (!decodeNumber(token, decoded))
1563     return false;
1564   currentValue().swapPayload(decoded);
1565   currentValue().setOffsetStart(token.start_ - begin_);
1566   currentValue().setOffsetLimit(token.end_ - begin_);
1567   return true;
1568 }
1569 
decodeNumber(Token & token,Value & decoded)1570 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1571   // Attempts to parse the number as an integer. If the number is
1572   // larger than the maximum supported value of an integer then
1573   // we decode the number as a double.
1574   Location current = token.start_;
1575   const bool isNegative = *current == '-';
1576   if (isNegative) {
1577     ++current;
1578   }
1579 
1580   // We assume we can represent the largest and smallest integer types as
1581   // unsigned integers with separate sign. This is only true if they can fit
1582   // into an unsigned integer.
1583   JSONCPP_STATIC_ASSERT(LargestUInt(Value::maxLargestInt) <=
1584                             Value::maxLargestUInt,
1585                         "Int must be smaller than Uint");
1586   // We need to convert minLargestInt into a positive number. The easiest way
1587   // to do this conversion is to assume our "threshold" value of minLargestInt
1588   // divided by 10 can fit in maxLargestInt when absolute valued. This should
1589   // be a safe assumption.
1590   JSONCPP_STATIC_ASSERT(
1591       Value::minLargestInt <= -Value::maxLargestInt,
1592       "The absolute value of minLargestInt must ve greater than or"
1593       "equal to maxLargestInt");
1594 
1595   JSONCPP_STATIC_ASSERT(
1596       Value::minLargestInt / 10 >= -Value::maxLargestInt,
1597       "The absolute value of minLargestInt must be only 1 magnitude"
1598       "larger than maxLargestInt");
1599 
1600   static JSONCPP_CONST Value::LargestUInt positive_threshold =
1601       Value::maxLargestUInt / 10;
1602   static JSONCPP_CONST Value::UInt positive_last_digit =
1603       Value::maxLargestUInt % 10;
1604 
1605   // For the negative values, we have to be more careful. Since typically
1606   // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1607   // then take the inverse. This assumes that minLargestInt is only a single
1608   // power of 10 different in magnitude, which we check above. For the last
1609   // digit, we take the modulus before negating for the same reason.
1610   static JSONCPP_CONST Value::LargestUInt negative_threshold =
1611       Value::LargestUInt(-(Value::minLargestInt / 10));
1612   static JSONCPP_CONST Value::UInt negative_last_digit =
1613       Value::UInt(-(Value::minLargestInt % 10));
1614 
1615   const Value::LargestUInt threshold =
1616       isNegative ? negative_threshold : positive_threshold;
1617   const Value::UInt max_last_digit =
1618       isNegative ? negative_last_digit : positive_last_digit;
1619 
1620   Value::LargestUInt value = 0;
1621   while (current < token.end_) {
1622     Char c = *current++;
1623     if (c < '0' || c > '9')
1624       return decodeDouble(token, decoded);
1625 
1626     const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
1627     if (value >= threshold) {
1628       // We've hit or exceeded the max value divided by 10 (rounded down). If
1629       // a) we've only just touched the limit, meaing value == threshold,
1630       // b) this is the last digit, or
1631       // c) it's small enough to fit in that rounding delta, we're okay.
1632       // Otherwise treat this number as a double to avoid overflow.
1633       if (value > threshold || current != token.end_ ||
1634           digit > max_last_digit) {
1635         return decodeDouble(token, decoded);
1636       }
1637     }
1638     value = value * 10 + digit;
1639   }
1640 
1641   if (isNegative) {
1642     // We use the same magnitude assumption here, just in case.
1643     const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
1644     decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1645   } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1646     decoded = Value::LargestInt(value);
1647   } else {
1648     decoded = value;
1649   }
1650 
1651   return true;
1652 }
1653 
decodeDouble(Token & token)1654 bool OurReader::decodeDouble(Token& token) {
1655   Value decoded;
1656   if (!decodeDouble(token, decoded))
1657     return false;
1658   currentValue().swapPayload(decoded);
1659   currentValue().setOffsetStart(token.start_ - begin_);
1660   currentValue().setOffsetLimit(token.end_ - begin_);
1661   return true;
1662 }
1663 
decodeDouble(Token & token,Value & decoded)1664 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1665   double value = 0;
1666   const String buffer(token.start_, token.end_);
1667   IStringStream is(buffer);
1668   if (!(is >> value)) {
1669     // the value could be lower than numeric_limits<double>::min(), in this situtation we should return the value with the gurantee
1670     // of conversion which has been performed and no occurances of range error.
1671     if ((value > 0 && value < std::numeric_limits<double>::min()) || (value < 0 && value > -std::numeric_limits<double>::min())) {
1672       decoded = value;
1673       return true;
1674     }
1675     return addError(
1676         "'" + String(token.start_, token.end_) + "' is not a number.", token);
1677   }
1678   decoded = value;
1679   return true;
1680 }
1681 
decodeString(Token & token)1682 bool OurReader::decodeString(Token& token) {
1683   String decoded_string;
1684   if (!decodeString(token, decoded_string))
1685     return false;
1686   Value decoded(decoded_string);
1687   currentValue().swapPayload(decoded);
1688   currentValue().setOffsetStart(token.start_ - begin_);
1689   currentValue().setOffsetLimit(token.end_ - begin_);
1690   return true;
1691 }
1692 
decodeString(Token & token,String & decoded)1693 bool OurReader::decodeString(Token& token, String& decoded) {
1694   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1695   Location current = token.start_ + 1; // skip '"'
1696   Location end = token.end_ - 1;       // do not include '"'
1697   while (current != end) {
1698     Char c = *current++;
1699     if (c == '"')
1700       break;
1701     if (c == '\\') {
1702       if (current == end)
1703         return addError("Empty escape sequence in string", token, current);
1704       Char escape = *current++;
1705       switch (escape) {
1706       case '"':
1707         decoded += '"';
1708         break;
1709       case '/':
1710         decoded += '/';
1711         break;
1712       case '\\':
1713         decoded += '\\';
1714         break;
1715       case 'b':
1716         decoded += '\b';
1717         break;
1718       case 'f':
1719         decoded += '\f';
1720         break;
1721       case 'n':
1722         decoded += '\n';
1723         break;
1724       case 'r':
1725         decoded += '\r';
1726         break;
1727       case 't':
1728         decoded += '\t';
1729         break;
1730       case 'u': {
1731         unsigned int unicode;
1732         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1733           return false;
1734         decoded += codePointToUTF8(unicode);
1735       } break;
1736       default:
1737         return addError("Bad escape sequence in string", token, current);
1738       }
1739     } else {
1740       decoded += c;
1741     }
1742   }
1743   return true;
1744 }
1745 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1746 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1747                                        Location end, unsigned int& unicode) {
1748 
1749   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1750     return false;
1751   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1752     // surrogate pairs
1753     if (end - current < 6)
1754       return addError(
1755           "additional six characters expected to parse unicode surrogate pair.",
1756           token, current);
1757     if (*(current++) == '\\' && *(current++) == 'u') {
1758       unsigned int surrogatePair;
1759       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1760         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1761       } else
1762         return false;
1763     } else
1764       return addError("expecting another \\u token to begin the second half of "
1765                       "a unicode surrogate pair",
1766                       token, current);
1767   }
1768   return true;
1769 }
1770 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1771 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1772                                             Location end,
1773                                             unsigned int& ret_unicode) {
1774   if (end - current < 4)
1775     return addError(
1776         "Bad unicode escape sequence in string: four digits expected.", token,
1777         current);
1778   int unicode = 0;
1779   for (int index = 0; index < 4; ++index) {
1780     Char c = *current++;
1781     unicode *= 16;
1782     if (c >= '0' && c <= '9')
1783       unicode += c - '0';
1784     else if (c >= 'a' && c <= 'f')
1785       unicode += c - 'a' + 10;
1786     else if (c >= 'A' && c <= 'F')
1787       unicode += c - 'A' + 10;
1788     else
1789       return addError(
1790           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1791           token, current);
1792   }
1793   ret_unicode = static_cast<unsigned int>(unicode);
1794   return true;
1795 }
1796 
addError(const String & message,Token & token,Location extra)1797 bool OurReader::addError(const String& message, Token& token, Location extra) {
1798   ErrorInfo info;
1799   info.token_ = token;
1800   info.message_ = message;
1801   info.extra_ = extra;
1802   errors_.push_back(info);
1803   return false;
1804 }
1805 
recoverFromError(TokenType skipUntilToken)1806 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1807   size_t errorCount = errors_.size();
1808   Token skip;
1809   for (;;) {
1810     if (!readToken(skip))
1811       errors_.resize(errorCount); // discard errors caused by recovery
1812     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1813       break;
1814   }
1815   errors_.resize(errorCount);
1816   return false;
1817 }
1818 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1819 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1820                                    TokenType skipUntilToken) {
1821   addError(message, token);
1822   return recoverFromError(skipUntilToken);
1823 }
1824 
currentValue()1825 Value& OurReader::currentValue() { return *(nodes_.top()); }
1826 
getNextChar()1827 OurReader::Char OurReader::getNextChar() {
1828   if (current_ == end_)
1829     return 0;
1830   return *current_++;
1831 }
1832 
getLocationLineAndColumn(Location location,int & line,int & column) const1833 void OurReader::getLocationLineAndColumn(Location location, int& line,
1834                                          int& column) const {
1835   Location current = begin_;
1836   Location lastLineStart = current;
1837   line = 0;
1838   while (current < location && current != end_) {
1839     Char c = *current++;
1840     if (c == '\r') {
1841       if (*current == '\n')
1842         ++current;
1843       lastLineStart = current;
1844       ++line;
1845     } else if (c == '\n') {
1846       lastLineStart = current;
1847       ++line;
1848     }
1849   }
1850   // column & line start at 1
1851   column = int(location - lastLineStart) + 1;
1852   ++line;
1853 }
1854 
getLocationLineAndColumn(Location location) const1855 String OurReader::getLocationLineAndColumn(Location location) const {
1856   int line, column;
1857   getLocationLineAndColumn(location, line, column);
1858   char buffer[18 + 16 + 16 + 1];
1859   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1860   return buffer;
1861 }
1862 
getFormattedErrorMessages() const1863 String OurReader::getFormattedErrorMessages() const {
1864   String formattedMessage;
1865   for (Errors::const_iterator itError = errors_.begin();
1866        itError != errors_.end(); ++itError) {
1867     const ErrorInfo& error = *itError;
1868     formattedMessage +=
1869         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1870     formattedMessage += "  " + error.message_ + "\n";
1871     if (error.extra_)
1872       formattedMessage +=
1873           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1874   }
1875   return formattedMessage;
1876 }
1877 
getStructuredErrors() const1878 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1879   std::vector<OurReader::StructuredError> allErrors;
1880   for (Errors::const_iterator itError = errors_.begin();
1881        itError != errors_.end(); ++itError) {
1882     const ErrorInfo& error = *itError;
1883     OurReader::StructuredError structured;
1884     structured.offset_start = error.token_.start_ - begin_;
1885     structured.offset_limit = error.token_.end_ - begin_;
1886     structured.message = error.message_;
1887     allErrors.push_back(structured);
1888   }
1889   return allErrors;
1890 }
1891 
1892 class OurCharReader : public CharReader {
1893   bool const collectComments_;
1894   OurReader reader_;
1895 
1896 public:
OurCharReader(bool collectComments,OurFeatures const & features)1897   OurCharReader(bool collectComments, OurFeatures const& features)
1898       : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1899   bool parse(char const* beginDoc, char const* endDoc, Value* root,
1900              String* errs) JSONCPP_OVERRIDE {
1901     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1902     if (errs) {
1903       *errs = reader_.getFormattedErrorMessages();
1904     }
1905     return ok;
1906   }
1907 };
1908 
CharReaderBuilder()1909 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
~CharReaderBuilder()1910 CharReaderBuilder::~CharReaderBuilder() {}
newCharReader() const1911 CharReader* CharReaderBuilder::newCharReader() const {
1912   bool collectComments = settings_["collectComments"].asBool();
1913   OurFeatures features = OurFeatures::all();
1914   features.allowComments_ = settings_["allowComments"].asBool();
1915   features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1916   features.strictRoot_ = settings_["strictRoot"].asBool();
1917   features.allowDroppedNullPlaceholders_ =
1918       settings_["allowDroppedNullPlaceholders"].asBool();
1919   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1920   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1921 
1922   // Stack limit is always a size_t, so we get this as an unsigned int
1923   // regardless of it we have 64-bit integer support enabled.
1924   features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1925   features.failIfExtra_ = settings_["failIfExtra"].asBool();
1926   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1927   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1928   features.skipBom_ = settings_["skipBom"].asBool();
1929   return new OurCharReader(collectComments, features);
1930 }
getValidReaderKeys(std::set<String> * valid_keys)1931 static void getValidReaderKeys(std::set<String>* valid_keys) {
1932   valid_keys->clear();
1933   valid_keys->insert("collectComments");
1934   valid_keys->insert("allowComments");
1935   valid_keys->insert("allowTrailingCommas");
1936   valid_keys->insert("strictRoot");
1937   valid_keys->insert("allowDroppedNullPlaceholders");
1938   valid_keys->insert("allowNumericKeys");
1939   valid_keys->insert("allowSingleQuotes");
1940   valid_keys->insert("stackLimit");
1941   valid_keys->insert("failIfExtra");
1942   valid_keys->insert("rejectDupKeys");
1943   valid_keys->insert("allowSpecialFloats");
1944   valid_keys->insert("skipBom");
1945 }
validate(Json::Value * invalid) const1946 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1947   Json::Value my_invalid;
1948   if (!invalid)
1949     invalid = &my_invalid; // so we do not need to test for NULL
1950   Json::Value& inv = *invalid;
1951   std::set<String> valid_keys;
1952   getValidReaderKeys(&valid_keys);
1953   Value::Members keys = settings_.getMemberNames();
1954   size_t n = keys.size();
1955   for (size_t i = 0; i < n; ++i) {
1956     String const& key = keys[i];
1957     if (valid_keys.find(key) == valid_keys.end()) {
1958       inv[key] = settings_[key];
1959     }
1960   }
1961   return inv.empty();
1962 }
operator [](const String & key)1963 Value& CharReaderBuilder::operator[](const String& key) {
1964   return settings_[key];
1965 }
1966 // static
strictMode(Json::Value * settings)1967 void CharReaderBuilder::strictMode(Json::Value* settings) {
1968   //! [CharReaderBuilderStrictMode]
1969   (*settings)["allowComments"] = false;
1970   (*settings)["allowTrailingCommas"] = false;
1971   (*settings)["strictRoot"] = true;
1972   (*settings)["allowDroppedNullPlaceholders"] = false;
1973   (*settings)["allowNumericKeys"] = false;
1974   (*settings)["allowSingleQuotes"] = false;
1975   (*settings)["stackLimit"] = 1000;
1976   (*settings)["failIfExtra"] = true;
1977   (*settings)["rejectDupKeys"] = true;
1978   (*settings)["allowSpecialFloats"] = false;
1979   (*settings)["skipBom"] = true;
1980   //! [CharReaderBuilderStrictMode]
1981 }
1982 // static
setDefaults(Json::Value * settings)1983 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1984   //! [CharReaderBuilderDefaults]
1985   (*settings)["collectComments"] = true;
1986   (*settings)["allowComments"] = true;
1987   (*settings)["allowTrailingCommas"] = true;
1988   (*settings)["strictRoot"] = false;
1989   (*settings)["allowDroppedNullPlaceholders"] = false;
1990   (*settings)["allowNumericKeys"] = false;
1991   (*settings)["allowSingleQuotes"] = false;
1992   (*settings)["stackLimit"] = 1000;
1993   (*settings)["failIfExtra"] = false;
1994   (*settings)["rejectDupKeys"] = false;
1995   (*settings)["allowSpecialFloats"] = false;
1996   (*settings)["skipBom"] = true;
1997   //! [CharReaderBuilderDefaults]
1998 }
1999 
2000 //////////////////////////////////
2001 // global functions
2002 
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)2003 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
2004                      String* errs) {
2005   OStringStream ssin;
2006   ssin << sin.rdbuf();
2007   String doc = ssin.str();
2008   char const* begin = doc.data();
2009   char const* end = begin + doc.size();
2010   // Note that we do not actually need a null-terminator.
2011   CharReaderPtr const reader(fact.newCharReader());
2012   bool ret = reader->parse(begin, end, root, errs);
2013   delete reader;
2014   return ret;
2015 }
2016 
operator >>(IStream & sin,Value & root)2017 IStream& operator>>(IStream& sin, Value& root) {
2018   CharReaderBuilder b;
2019   String errs;
2020   bool ok = parseFromStream(b, sin, &root, &errs);
2021   if (!ok) {
2022     throwRuntimeError(errs);
2023   }
2024   return sin;
2025 }
2026 
2027 } // namespace Json
2028