• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <iostream>
16 #include <istream>
17 #include <limits>
18 #include <memory>
19 #include <set>
20 #include <sstream>
21 #include <utility>
22 
23 #include <cstdio>
24 #if __cplusplus >= 201103L
25 
26 #if !defined(sscanf)
27 #define sscanf std::sscanf
28 #endif
29 
30 #endif //__cplusplus
31 
32 #if defined(_MSC_VER)
33 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
34 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
35 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
36 #endif //_MSC_VER
37 
38 #if defined(_MSC_VER)
39 // Disable warning about strdup being deprecated.
40 #pragma warning(disable : 4996)
41 #endif
42 
43 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
44 // time to change the stack limit
45 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
46 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
47 #endif
48 
49 static size_t const stackLimit_g =
50     JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
51 
52 namespace Json {
53 
54 typedef CharReader* CharReaderPtr;
55 
56 // Implementation of class Features
57 // ////////////////////////////////
58 
Features()59 Features::Features()
60     : allowComments_(true), strictRoot_(false),
61       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
all()62 Features Features::all() { return Features(); }
63 
strictMode()64 Features Features::strictMode() {
65   Features features;
66   features.allowComments_ = false;
67   features.strictRoot_ = true;
68   features.allowDroppedNullPlaceholders_ = false;
69   features.allowNumericKeys_ = false;
70   return features;
71 }
72 
73 // Implementation of class Reader
74 // ////////////////////////////////
75 
containsNewLine(Reader::Location begin,Reader::Location end)76 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
77   for (; begin < end; ++begin)
78     if (*begin == '\n' || *begin == '\r')
79       return true;
80   return false;
81 }
82 
83 // Class Reader
84 // //////////////////////////////////////////////////////////////////
85 
Reader()86 Reader::Reader()
87     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
88       lastValue_(), commentsBefore_(), features_(Features::all()),
89       collectComments_() {}
90 
Reader(const Features & features)91 Reader::Reader(const Features& features)
92     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95 
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document, Value& root,
97                    bool collectComments) {
98   document_.assign(document.begin(), document.end());
99   const char* begin = document_.c_str();
100   const char* end = begin + document_.length();
101   return parse(begin, end, root, collectComments);
102 }
103 
parse(std::istream & is,Value & root,bool collectComments)104 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
105   // std::istream_iterator<char> begin(is);
106   // std::istream_iterator<char> end;
107   // Those would allow streamed input from a file, if parse() were a
108   // template function.
109 
110   // Since String is reference-counted, this at least does not
111   // create an extra copy.
112   String doc;
113   std::getline(is, doc, static_cast<char> EOF);
114   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
115 }
116 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)117 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
118                    bool collectComments) {
119   if (!features_.allowComments_) {
120     collectComments = false;
121   }
122 
123   begin_ = beginDoc;
124   end_ = endDoc;
125   collectComments_ = collectComments;
126   current_ = begin_;
127   lastValueEnd_ = JSONCPP_NULL;
128   lastValue_ = JSONCPP_NULL;
129   commentsBefore_.clear();
130   errors_.clear();
131   while (!nodes_.empty())
132     nodes_.pop();
133   nodes_.push(&root);
134 
135   bool successful = readValue();
136   Token token;
137   skipCommentTokens(token);
138   if (collectComments_ && !commentsBefore_.empty())
139     root.setComment(commentsBefore_, commentAfter);
140   if (features_.strictRoot_) {
141     if (!root.isArray() && !root.isObject()) {
142       // Set error location to start of doc, ideally should be first token found
143       // in doc
144       token.type_ = tokenError;
145       token.start_ = beginDoc;
146       token.end_ = endDoc;
147       addError(
148           "A valid JSON document must be either an array or an object value.",
149           token);
150       return false;
151     }
152   }
153   return successful;
154 }
155 
readValue()156 bool Reader::readValue() {
157   // readValue() may call itself only if it calls readObject() or ReadArray().
158   // These methods execute nodes_.push() just before and nodes_.pop)() just
159   // after calling readValue(). parse() executes one nodes_.push(), so > instead
160   // of >=.
161   if (nodes_.size() > stackLimit_g)
162     throwRuntimeError("Exceeded stackLimit in readValue().");
163 
164   Token token;
165   skipCommentTokens(token);
166   bool successful = true;
167 
168   if (collectComments_ && !commentsBefore_.empty()) {
169     currentValue().setComment(commentsBefore_, commentBefore);
170     commentsBefore_.clear();
171   }
172 
173   switch (token.type_) {
174   case tokenObjectBegin:
175     successful = readObject(token);
176     currentValue().setOffsetLimit(current_ - begin_);
177     break;
178   case tokenArrayBegin:
179     successful = readArray(token);
180     currentValue().setOffsetLimit(current_ - begin_);
181     break;
182   case tokenNumber:
183     successful = decodeNumber(token);
184     break;
185   case tokenString:
186     successful = decodeString(token);
187     break;
188   case tokenTrue: {
189     Value v(true);
190     currentValue().swapPayload(v);
191     currentValue().setOffsetStart(token.start_ - begin_);
192     currentValue().setOffsetLimit(token.end_ - begin_);
193   } break;
194   case tokenFalse: {
195     Value v(false);
196     currentValue().swapPayload(v);
197     currentValue().setOffsetStart(token.start_ - begin_);
198     currentValue().setOffsetLimit(token.end_ - begin_);
199   } break;
200   case tokenNull: {
201     Value v;
202     currentValue().swapPayload(v);
203     currentValue().setOffsetStart(token.start_ - begin_);
204     currentValue().setOffsetLimit(token.end_ - begin_);
205   } break;
206   case tokenArraySeparator:
207   case tokenObjectEnd:
208   case tokenArrayEnd:
209     if (features_.allowDroppedNullPlaceholders_) {
210       // "Un-read" the current token and mark the current value as a null
211       // token.
212       current_--;
213       Value v;
214       currentValue().swapPayload(v);
215       currentValue().setOffsetStart(current_ - begin_ - 1);
216       currentValue().setOffsetLimit(current_ - begin_);
217       break;
218     } // Else, fall through...
219   default:
220     currentValue().setOffsetStart(token.start_ - begin_);
221     currentValue().setOffsetLimit(token.end_ - begin_);
222     return addError("Syntax error: value, object or array expected.", token);
223   }
224 
225   if (collectComments_) {
226     lastValueEnd_ = current_;
227     lastValue_ = &currentValue();
228   }
229 
230   return successful;
231 }
232 
skipCommentTokens(Token & token)233 void Reader::skipCommentTokens(Token& token) {
234   if (features_.allowComments_) {
235     do {
236       readToken(token);
237     } while (token.type_ == tokenComment);
238   } else {
239     readToken(token);
240   }
241 }
242 
readToken(Token & token)243 bool Reader::readToken(Token& token) {
244   skipSpaces();
245   token.start_ = current_;
246   Char c = getNextChar();
247   bool ok = true;
248   switch (c) {
249   case '{':
250     token.type_ = tokenObjectBegin;
251     break;
252   case '}':
253     token.type_ = tokenObjectEnd;
254     break;
255   case '[':
256     token.type_ = tokenArrayBegin;
257     break;
258   case ']':
259     token.type_ = tokenArrayEnd;
260     break;
261   case '"':
262     token.type_ = tokenString;
263     ok = readString();
264     break;
265   case '/':
266     token.type_ = tokenComment;
267     ok = readComment();
268     break;
269   case '0':
270   case '1':
271   case '2':
272   case '3':
273   case '4':
274   case '5':
275   case '6':
276   case '7':
277   case '8':
278   case '9':
279   case '-':
280     token.type_ = tokenNumber;
281     readNumber();
282     break;
283   case 't':
284     token.type_ = tokenTrue;
285     ok = match("rue", 3);
286     break;
287   case 'f':
288     token.type_ = tokenFalse;
289     ok = match("alse", 4);
290     break;
291   case 'n':
292     token.type_ = tokenNull;
293     ok = match("ull", 3);
294     break;
295   case ',':
296     token.type_ = tokenArraySeparator;
297     break;
298   case ':':
299     token.type_ = tokenMemberSeparator;
300     break;
301   case 0:
302     token.type_ = tokenEndOfStream;
303     break;
304   default:
305     ok = false;
306     break;
307   }
308   if (!ok)
309     token.type_ = tokenError;
310   token.end_ = current_;
311   return ok;
312 }
313 
skipSpaces()314 void Reader::skipSpaces() {
315   while (current_ != end_) {
316     Char c = *current_;
317     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
318       ++current_;
319     else
320       break;
321   }
322 }
323 
match(const Char * pattern,int patternLength)324 bool Reader::match(const Char* pattern, int patternLength) {
325   if (end_ - current_ < patternLength)
326     return false;
327   int index = patternLength;
328   while (index--)
329     if (current_[index] != pattern[index])
330       return false;
331   current_ += patternLength;
332   return true;
333 }
334 
readComment()335 bool Reader::readComment() {
336   Location commentBegin = current_ - 1;
337   Char c = getNextChar();
338   bool successful = false;
339   if (c == '*')
340     successful = readCStyleComment();
341   else if (c == '/')
342     successful = readCppStyleComment();
343   if (!successful)
344     return false;
345 
346   if (collectComments_) {
347     CommentPlacement placement = commentBefore;
348     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
349       if (c != '*' || !containsNewLine(commentBegin, current_))
350         placement = commentAfterOnSameLine;
351     }
352 
353     addComment(commentBegin, current_, placement);
354   }
355   return true;
356 }
357 
normalizeEOL(Reader::Location begin,Reader::Location end)358 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
359   String normalized;
360   normalized.reserve(static_cast<size_t>(end - begin));
361   Reader::Location current = begin;
362   while (current != end) {
363     char c = *current++;
364     if (c == '\r') {
365       if (current != end && *current == '\n')
366         // convert dos EOL
367         ++current;
368       // convert Mac EOL
369       normalized += '\n';
370     } else {
371       normalized += c;
372     }
373   }
374   return normalized;
375 }
376 
addComment(Location begin,Location end,CommentPlacement placement)377 void Reader::addComment(Location begin, Location end,
378                         CommentPlacement placement) {
379   assert(collectComments_);
380   const String& normalized = normalizeEOL(begin, end);
381   if (placement == commentAfterOnSameLine) {
382     assert(lastValue_ != JSONCPP_NULL);
383     lastValue_->setComment(normalized, placement);
384   } else {
385     commentsBefore_ += normalized;
386   }
387 }
388 
readCStyleComment()389 bool Reader::readCStyleComment() {
390   while ((current_ + 1) < end_) {
391     Char c = getNextChar();
392     if (c == '*' && *current_ == '/')
393       break;
394   }
395   return getNextChar() == '/';
396 }
397 
readCppStyleComment()398 bool Reader::readCppStyleComment() {
399   while (current_ != end_) {
400     Char c = getNextChar();
401     if (c == '\n')
402       break;
403     if (c == '\r') {
404       // Consume DOS EOL. It will be normalized in addComment.
405       if (current_ != end_ && *current_ == '\n')
406         getNextChar();
407       // Break on Moc OS 9 EOL.
408       break;
409     }
410   }
411   return true;
412 }
413 
readNumber()414 void Reader::readNumber() {
415   Location p = current_;
416   char c = '0'; // stopgap for already consumed character
417   // integral part
418   while (c >= '0' && c <= '9')
419     c = (current_ = p) < end_ ? *p++ : '\0';
420   // fractional part
421   if (c == '.') {
422     c = (current_ = p) < end_ ? *p++ : '\0';
423     while (c >= '0' && c <= '9')
424       c = (current_ = p) < end_ ? *p++ : '\0';
425   }
426   // exponential part
427   if (c == 'e' || c == 'E') {
428     c = (current_ = p) < end_ ? *p++ : '\0';
429     if (c == '+' || c == '-')
430       c = (current_ = p) < end_ ? *p++ : '\0';
431     while (c >= '0' && c <= '9')
432       c = (current_ = p) < end_ ? *p++ : '\0';
433   }
434 }
435 
readString()436 bool Reader::readString() {
437   Char c = '\0';
438   while (current_ != end_) {
439     c = getNextChar();
440     if (c == '\\')
441       getNextChar();
442     else if (c == '"')
443       break;
444   }
445   return c == '"';
446 }
447 
readObject(Token & token)448 bool Reader::readObject(Token& token) {
449   Token tokenName;
450   String name;
451   Value init(objectValue);
452   currentValue().swapPayload(init);
453   currentValue().setOffsetStart(token.start_ - begin_);
454   while (readToken(tokenName)) {
455     bool initialTokenOk = true;
456     while (tokenName.type_ == tokenComment && initialTokenOk)
457       initialTokenOk = readToken(tokenName);
458     if (!initialTokenOk)
459       break;
460     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
461       return true;
462     name.clear();
463     if (tokenName.type_ == tokenString) {
464       if (!decodeString(tokenName, name))
465         return recoverFromError(tokenObjectEnd);
466     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
467       Value numberName;
468       if (!decodeNumber(tokenName, numberName))
469         return recoverFromError(tokenObjectEnd);
470       name = numberName.asString();
471     } else {
472       break;
473     }
474 
475     Token colon;
476     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
477       return addErrorAndRecover("Missing ':' after object member name", colon,
478                                 tokenObjectEnd);
479     }
480     Value& value = currentValue()[name];
481     nodes_.push(&value);
482     bool ok = readValue();
483     nodes_.pop();
484     if (!ok) // error already set
485       return recoverFromError(tokenObjectEnd);
486 
487     Token comma;
488     if (!readToken(comma) ||
489         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
490          comma.type_ != tokenComment)) {
491       return addErrorAndRecover("Missing ',' or '}' in object declaration",
492                                 comma, tokenObjectEnd);
493     }
494     bool finalizeTokenOk = true;
495     while (comma.type_ == tokenComment && finalizeTokenOk)
496       finalizeTokenOk = readToken(comma);
497     if (comma.type_ == tokenObjectEnd)
498       return true;
499   }
500   return addErrorAndRecover("Missing '}' or object member name", tokenName,
501                             tokenObjectEnd);
502 }
503 
readArray(Token & token)504 bool Reader::readArray(Token& token) {
505   Value init(arrayValue);
506   currentValue().swapPayload(init);
507   currentValue().setOffsetStart(token.start_ - begin_);
508   skipSpaces();
509   if (current_ != end_ && *current_ == ']') // empty array
510   {
511     Token endArray;
512     readToken(endArray);
513     return true;
514   }
515   int index = 0;
516   for (;;) {
517     Value& value = currentValue()[index++];
518     nodes_.push(&value);
519     bool ok = readValue();
520     nodes_.pop();
521     if (!ok) // error already set
522       return recoverFromError(tokenArrayEnd);
523 
524     Token currentToken;
525     // Accept Comment after last item in the array.
526     ok = readToken(currentToken);
527     while (currentToken.type_ == tokenComment && ok) {
528       ok = readToken(currentToken);
529     }
530     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
531                          currentToken.type_ != tokenArrayEnd);
532     if (!ok || badTokenType) {
533       return addErrorAndRecover("Missing ',' or ']' in array declaration",
534                                 currentToken, tokenArrayEnd);
535     }
536     if (currentToken.type_ == tokenArrayEnd)
537       break;
538   }
539   return true;
540 }
541 
decodeNumber(Token & token)542 bool Reader::decodeNumber(Token& token) {
543   Value decoded;
544   if (!decodeNumber(token, decoded))
545     return false;
546   currentValue().swapPayload(decoded);
547   currentValue().setOffsetStart(token.start_ - begin_);
548   currentValue().setOffsetLimit(token.end_ - begin_);
549   return true;
550 }
551 
decodeNumber(Token & token,Value & decoded)552 bool Reader::decodeNumber(Token& token, Value& decoded) {
553   // Attempts to parse the number as an integer. If the number is
554   // larger than the maximum supported value of an integer then
555   // we decode the number as a double.
556   Location current = token.start_;
557   bool isNegative = *current == '-';
558   if (isNegative)
559     ++current;
560   // TODO: Help the compiler do the div and mod at compile time or get rid of
561   // them.
562   Value::LargestUInt maxIntegerValue =
563       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
564                  : Value::maxLargestUInt;
565   Value::LargestUInt threshold = maxIntegerValue / 10;
566   Value::LargestUInt value = 0;
567   while (current < token.end_) {
568     Char c = *current++;
569     if (c < '0' || c > '9')
570       return decodeDouble(token, decoded);
571     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
572     if (value >= threshold) {
573       // We've hit or exceeded the max value divided by 10 (rounded down). If
574       // a) we've only just touched the limit, b) this is the last digit, and
575       // c) it's small enough to fit in that rounding delta, we're okay.
576       // Otherwise treat this number as a double to avoid overflow.
577       if (value > threshold || current != token.end_ ||
578           digit > maxIntegerValue % 10) {
579         return decodeDouble(token, decoded);
580       }
581     }
582     value = value * 10 + digit;
583   }
584   if (isNegative && value == maxIntegerValue)
585     decoded = Value::minLargestInt;
586   else if (isNegative)
587     decoded = -Value::LargestInt(value);
588   else if (value <= Value::LargestUInt(Value::maxInt))
589     decoded = Value::LargestInt(value);
590   else
591     decoded = value;
592   return true;
593 }
594 
decodeDouble(Token & token)595 bool Reader::decodeDouble(Token& token) {
596   Value decoded;
597   if (!decodeDouble(token, decoded))
598     return false;
599   currentValue().swapPayload(decoded);
600   currentValue().setOffsetStart(token.start_ - begin_);
601   currentValue().setOffsetLimit(token.end_ - begin_);
602   return true;
603 }
604 
decodeDouble(Token & token,Value & decoded)605 bool Reader::decodeDouble(Token& token, Value& decoded) {
606   double value = 0;
607   String buffer(token.start_, token.end_);
608   IStringStream is(buffer);
609   if (!(is >> value))
610     return addError(
611         "'" + String(token.start_, token.end_) + "' is not a number.", token);
612   decoded = value;
613   return true;
614 }
615 
decodeString(Token & token)616 bool Reader::decodeString(Token& token) {
617   String decoded_string;
618   if (!decodeString(token, decoded_string))
619     return false;
620   Value decoded(decoded_string);
621   currentValue().swapPayload(decoded);
622   currentValue().setOffsetStart(token.start_ - begin_);
623   currentValue().setOffsetLimit(token.end_ - begin_);
624   return true;
625 }
626 
decodeString(Token & token,String & decoded)627 bool Reader::decodeString(Token& token, String& decoded) {
628   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
629   Location current = token.start_ + 1; // skip '"'
630   Location end = token.end_ - 1;       // do not include '"'
631   while (current != end) {
632     Char c = *current++;
633     if (c == '"')
634       break;
635     if (c == '\\') {
636       if (current == end)
637         return addError("Empty escape sequence in string", token, current);
638       Char escape = *current++;
639       switch (escape) {
640       case '"':
641         decoded += '"';
642         break;
643       case '/':
644         decoded += '/';
645         break;
646       case '\\':
647         decoded += '\\';
648         break;
649       case 'b':
650         decoded += '\b';
651         break;
652       case 'f':
653         decoded += '\f';
654         break;
655       case 'n':
656         decoded += '\n';
657         break;
658       case 'r':
659         decoded += '\r';
660         break;
661       case 't':
662         decoded += '\t';
663         break;
664       case 'u': {
665         unsigned int unicode;
666         if (!decodeUnicodeCodePoint(token, current, end, unicode))
667           return false;
668         decoded += codePointToUTF8(unicode);
669       } break;
670       default:
671         return addError("Bad escape sequence in string", token, current);
672       }
673     } else {
674       decoded += c;
675     }
676   }
677   return true;
678 }
679 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)680 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
681                                     Location end, unsigned int& unicode) {
682 
683   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
684     return false;
685   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
686     // surrogate pairs
687     if (end - current < 6)
688       return addError(
689           "additional six characters expected to parse unicode surrogate pair.",
690           token, current);
691     if (*(current++) == '\\' && *(current++) == 'u') {
692       unsigned int surrogatePair;
693       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
694         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
695       } else
696         return false;
697     } else
698       return addError("expecting another \\u token to begin the second half of "
699                       "a unicode surrogate pair",
700                       token, current);
701   }
702   return true;
703 }
704 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)705 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
706                                          Location end,
707                                          unsigned int& ret_unicode) {
708   if (end - current < 4)
709     return addError(
710         "Bad unicode escape sequence in string: four digits expected.", token,
711         current);
712   int unicode = 0;
713   for (int index = 0; index < 4; ++index) {
714     Char c = *current++;
715     unicode *= 16;
716     if (c >= '0' && c <= '9')
717       unicode += c - '0';
718     else if (c >= 'a' && c <= 'f')
719       unicode += c - 'a' + 10;
720     else if (c >= 'A' && c <= 'F')
721       unicode += c - 'A' + 10;
722     else
723       return addError(
724           "Bad unicode escape sequence in string: hexadecimal digit expected.",
725           token, current);
726   }
727   ret_unicode = static_cast<unsigned int>(unicode);
728   return true;
729 }
730 
addError(const String & message,Token & token,Location extra)731 bool Reader::addError(const String& message, Token& token, Location extra) {
732   ErrorInfo info;
733   info.token_ = token;
734   info.message_ = message;
735   info.extra_ = extra;
736   errors_.push_back(info);
737   return false;
738 }
739 
recoverFromError(TokenType skipUntilToken)740 bool Reader::recoverFromError(TokenType skipUntilToken) {
741   size_t const errorCount = errors_.size();
742   Token skip;
743   for (;;) {
744     if (!readToken(skip))
745       errors_.resize(errorCount); // discard errors caused by recovery
746     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
747       break;
748   }
749   errors_.resize(errorCount);
750   return false;
751 }
752 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)753 bool Reader::addErrorAndRecover(const String& message, Token& token,
754                                 TokenType skipUntilToken) {
755   addError(message, token);
756   return recoverFromError(skipUntilToken);
757 }
758 
currentValue()759 Value& Reader::currentValue() { return *(nodes_.top()); }
760 
getNextChar()761 Reader::Char Reader::getNextChar() {
762   if (current_ == end_)
763     return 0;
764   return *current_++;
765 }
766 
getLocationLineAndColumn(Location location,int & line,int & column) const767 void Reader::getLocationLineAndColumn(Location location, int& line,
768                                       int& column) const {
769   Location current = begin_;
770   Location lastLineStart = current;
771   line = 0;
772   while (current < location && current != end_) {
773     Char c = *current++;
774     if (c == '\r') {
775       if (*current == '\n')
776         ++current;
777       lastLineStart = current;
778       ++line;
779     } else if (c == '\n') {
780       lastLineStart = current;
781       ++line;
782     }
783   }
784   // column & line start at 1
785   column = int(location - lastLineStart) + 1;
786   ++line;
787 }
788 
getLocationLineAndColumn(Location location) const789 String Reader::getLocationLineAndColumn(Location location) const {
790   int line, column;
791   getLocationLineAndColumn(location, line, column);
792   char buffer[18 + 16 + 16 + 1];
793   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
794   return buffer;
795 }
796 
797 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const798 String Reader::getFormatedErrorMessages() const {
799   return getFormattedErrorMessages();
800 }
801 
getFormattedErrorMessages() const802 String Reader::getFormattedErrorMessages() const {
803   String formattedMessage;
804   for (Errors::const_iterator itError = errors_.begin();
805        itError != errors_.end(); ++itError) {
806     const ErrorInfo& error = *itError;
807     formattedMessage +=
808         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
809     formattedMessage += "  " + error.message_ + "\n";
810     if (error.extra_)
811       formattedMessage +=
812           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
813   }
814   return formattedMessage;
815 }
816 
getStructuredErrors() const817 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
818   std::vector<Reader::StructuredError> allErrors;
819   for (Errors::const_iterator itError = errors_.begin();
820        itError != errors_.end(); ++itError) {
821     const ErrorInfo& error = *itError;
822     Reader::StructuredError structured;
823     structured.offset_start = error.token_.start_ - begin_;
824     structured.offset_limit = error.token_.end_ - begin_;
825     structured.message = error.message_;
826     allErrors.push_back(structured);
827   }
828   return allErrors;
829 }
830 
pushError(const Value & value,const String & message)831 bool Reader::pushError(const Value& value, const String& message) {
832   ptrdiff_t const length = end_ - begin_;
833   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
834     return false;
835   Token token;
836   token.type_ = tokenError;
837   token.start_ = begin_ + value.getOffsetStart();
838   token.end_ = begin_ + value.getOffsetLimit();
839   ErrorInfo info;
840   info.token_ = token;
841   info.message_ = message;
842   info.extra_ = JSONCPP_NULL;
843   errors_.push_back(info);
844   return true;
845 }
846 
pushError(const Value & value,const String & message,const Value & extra)847 bool Reader::pushError(const Value& value, const String& message,
848                        const Value& extra) {
849   ptrdiff_t const length = end_ - begin_;
850   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
851       extra.getOffsetLimit() > length)
852     return false;
853   Token token;
854   token.type_ = tokenError;
855   token.start_ = begin_ + value.getOffsetStart();
856   token.end_ = begin_ + value.getOffsetLimit();
857   ErrorInfo info;
858   info.token_ = token;
859   info.message_ = message;
860   info.extra_ = begin_ + extra.getOffsetStart();
861   errors_.push_back(info);
862   return true;
863 }
864 
good() const865 bool Reader::good() const { return errors_.empty(); }
866 
867 // Originally copied from the Features class (now deprecated), used internally
868 // for features implementation.
869 class OurFeatures {
870 public:
871   static OurFeatures all();
872   bool allowComments_;
873   bool allowTrailingCommas_;
874   bool strictRoot_;
875   bool allowDroppedNullPlaceholders_;
876   bool allowNumericKeys_;
877   bool allowSingleQuotes_;
878   bool failIfExtra_;
879   bool rejectDupKeys_;
880   bool allowSpecialFloats_;
881   bool skipBom_;
882   size_t stackLimit_;
883 }; // OurFeatures
884 
all()885 OurFeatures OurFeatures::all() { return OurFeatures(); }
886 
887 // Implementation of class Reader
888 // ////////////////////////////////
889 
890 // Originally copied from the Reader class (now deprecated), used internally
891 // for implementing JSON reading.
892 class OurReader {
893 public:
894   typedef char Char;
895   typedef const Char* Location;
896   struct StructuredError {
897     ptrdiff_t offset_start;
898     ptrdiff_t offset_limit;
899     String message;
900   };
901 
902   JSONCPP_OP_EXPLICIT OurReader(OurFeatures const& features);
903   bool parse(const char* beginDoc, const char* endDoc, Value& root,
904              bool collectComments = true);
905   String getFormattedErrorMessages() const;
906   std::vector<StructuredError> getStructuredErrors() const;
907 
908 private:
909   OurReader(OurReader const&);      // no impl
910   void operator=(OurReader const&); // no impl
911 
912   enum TokenType {
913     tokenEndOfStream = 0,
914     tokenObjectBegin,
915     tokenObjectEnd,
916     tokenArrayBegin,
917     tokenArrayEnd,
918     tokenString,
919     tokenNumber,
920     tokenTrue,
921     tokenFalse,
922     tokenNull,
923     tokenNaN,
924     tokenPosInf,
925     tokenNegInf,
926     tokenArraySeparator,
927     tokenMemberSeparator,
928     tokenComment,
929     tokenError
930   };
931 
932   class Token {
933   public:
934     TokenType type_;
935     Location start_;
936     Location end_;
937   };
938 
939   class ErrorInfo {
940   public:
941     Token token_;
942     String message_;
943     Location extra_;
944   };
945 
946   typedef std::deque<ErrorInfo> Errors;
947 
948   bool readToken(Token& token);
949   void skipSpaces();
950   void skipBom(bool skipBom);
951   bool match(const Char* pattern, int patternLength);
952   bool readComment();
953   bool readCStyleComment(bool* containsNewLineResult);
954   bool readCppStyleComment();
955   bool readString();
956   bool readStringSingleQuote();
957   bool readNumber(bool checkInf);
958   bool readValue();
959   bool readObject(Token& token);
960   bool readArray(Token& token);
961   bool decodeNumber(Token& token);
962   bool decodeNumber(Token& token, Value& decoded);
963   bool decodeString(Token& token);
964   bool decodeString(Token& token, String& decoded);
965   bool decodeDouble(Token& token);
966   bool decodeDouble(Token& token, Value& decoded);
967   bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
968                               unsigned int& unicode);
969   bool decodeUnicodeEscapeSequence(Token& token, Location& current,
970                                    Location end, unsigned int& unicode);
971   bool addError(const String& message, Token& token,
972                 Location extra = JSONCPP_NULL);
973   bool recoverFromError(TokenType skipUntilToken);
974   bool addErrorAndRecover(const String& message, Token& token,
975                           TokenType skipUntilToken);
976   void skipUntilSpace();
977   Value& currentValue();
978   Char getNextChar();
979   void getLocationLineAndColumn(Location location, int& line,
980                                 int& column) const;
981   String getLocationLineAndColumn(Location location) const;
982   void addComment(Location begin, Location end, CommentPlacement placement);
983   void skipCommentTokens(Token& token);
984 
985   static String normalizeEOL(Location begin, Location end);
986   static bool containsNewLine(Location begin, Location end);
987 
988   typedef std::stack<Value*> Nodes;
989 
990   Nodes nodes_;
991   Errors errors_;
992   String document_;
993   Location begin_;
994   Location end_;
995   Location current_;
996   Location lastValueEnd_;
997   Value* lastValue_;
998   bool lastValueHasAComment_;
999   String commentsBefore_;
1000 
1001   OurFeatures const features_;
1002   bool collectComments_;
1003 }; // OurReader
1004 
1005 // complete copy of Read impl, for OurReader
1006 
containsNewLine(OurReader::Location begin,OurReader::Location end)1007 bool OurReader::containsNewLine(OurReader::Location begin,
1008                                 OurReader::Location end) {
1009   for (; begin < end; ++begin)
1010     if (*begin == '\n' || *begin == '\r')
1011       return true;
1012   return false;
1013 }
1014 
OurReader(OurFeatures const & features)1015 OurReader::OurReader(OurFeatures const& features)
1016     : errors_(), document_(), begin_(JSONCPP_NULL), end_(JSONCPP_NULL),
1017       current_(JSONCPP_NULL), lastValueEnd_(JSONCPP_NULL),
1018       lastValue_(JSONCPP_NULL), lastValueHasAComment_(false), commentsBefore_(),
1019       features_(features), collectComments_(false) {}
1020 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1021 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1022                       bool collectComments) {
1023   if (!features_.allowComments_) {
1024     collectComments = false;
1025   }
1026 
1027   begin_ = beginDoc;
1028   end_ = endDoc;
1029   collectComments_ = collectComments;
1030   current_ = begin_;
1031   lastValueEnd_ = JSONCPP_NULL;
1032   lastValue_ = JSONCPP_NULL;
1033   commentsBefore_.clear();
1034   errors_.clear();
1035   while (!nodes_.empty())
1036     nodes_.pop();
1037   nodes_.push(&root);
1038 
1039   // skip byte order mark if it exists at the beginning of the UTF-8 text.
1040   skipBom(features_.skipBom_);
1041   bool successful = readValue();
1042   nodes_.pop();
1043   Token token;
1044   skipCommentTokens(token);
1045   if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1046     addError("Extra non-whitespace after JSON value.", token);
1047     return false;
1048   }
1049   if (collectComments_ && !commentsBefore_.empty())
1050     root.setComment(commentsBefore_, commentAfter);
1051   if (features_.strictRoot_) {
1052     if (!root.isArray() && !root.isObject()) {
1053       // Set error location to start of doc, ideally should be first token found
1054       // in doc
1055       token.type_ = tokenError;
1056       token.start_ = beginDoc;
1057       token.end_ = endDoc;
1058       addError(
1059           "A valid JSON document must be either an array or an object value.",
1060           token);
1061       return false;
1062     }
1063   }
1064   return successful;
1065 }
1066 
readValue()1067 bool OurReader::readValue() {
1068   //  To preserve the old behaviour we cast size_t to int.
1069   if (nodes_.size() > features_.stackLimit_)
1070     throwRuntimeError("Exceeded stackLimit in readValue().");
1071   Token token;
1072   skipCommentTokens(token);
1073   bool successful = true;
1074 
1075   if (collectComments_ && !commentsBefore_.empty()) {
1076     currentValue().setComment(commentsBefore_, commentBefore);
1077     commentsBefore_.clear();
1078   }
1079 
1080   switch (token.type_) {
1081   case tokenObjectBegin:
1082     successful = readObject(token);
1083     currentValue().setOffsetLimit(current_ - begin_);
1084     break;
1085   case tokenArrayBegin:
1086     successful = readArray(token);
1087     currentValue().setOffsetLimit(current_ - begin_);
1088     break;
1089   case tokenNumber:
1090     successful = decodeNumber(token);
1091     break;
1092   case tokenString:
1093     successful = decodeString(token);
1094     break;
1095   case tokenTrue: {
1096     Value v(true);
1097     currentValue().swapPayload(v);
1098     currentValue().setOffsetStart(token.start_ - begin_);
1099     currentValue().setOffsetLimit(token.end_ - begin_);
1100   } break;
1101   case tokenFalse: {
1102     Value v(false);
1103     currentValue().swapPayload(v);
1104     currentValue().setOffsetStart(token.start_ - begin_);
1105     currentValue().setOffsetLimit(token.end_ - begin_);
1106   } break;
1107   case tokenNull: {
1108     Value v;
1109     currentValue().swapPayload(v);
1110     currentValue().setOffsetStart(token.start_ - begin_);
1111     currentValue().setOffsetLimit(token.end_ - begin_);
1112   } break;
1113   case tokenNaN: {
1114     Value v(std::numeric_limits<double>::quiet_NaN());
1115     currentValue().swapPayload(v);
1116     currentValue().setOffsetStart(token.start_ - begin_);
1117     currentValue().setOffsetLimit(token.end_ - begin_);
1118   } break;
1119   case tokenPosInf: {
1120     Value v(std::numeric_limits<double>::infinity());
1121     currentValue().swapPayload(v);
1122     currentValue().setOffsetStart(token.start_ - begin_);
1123     currentValue().setOffsetLimit(token.end_ - begin_);
1124   } break;
1125   case tokenNegInf: {
1126     Value v(-std::numeric_limits<double>::infinity());
1127     currentValue().swapPayload(v);
1128     currentValue().setOffsetStart(token.start_ - begin_);
1129     currentValue().setOffsetLimit(token.end_ - begin_);
1130   } break;
1131   case tokenArraySeparator:
1132   case tokenObjectEnd:
1133   case tokenArrayEnd:
1134     if (features_.allowDroppedNullPlaceholders_) {
1135       // "Un-read" the current token and mark the current value as a null
1136       // token.
1137       current_--;
1138       Value v;
1139       currentValue().swapPayload(v);
1140       currentValue().setOffsetStart(current_ - begin_ - 1);
1141       currentValue().setOffsetLimit(current_ - begin_);
1142       break;
1143     } // else, fall through ...
1144   default:
1145     currentValue().setOffsetStart(token.start_ - begin_);
1146     currentValue().setOffsetLimit(token.end_ - begin_);
1147     return addError("Syntax error: value, object or array expected.", token);
1148   }
1149 
1150   if (collectComments_) {
1151     lastValueEnd_ = current_;
1152     lastValueHasAComment_ = false;
1153     lastValue_ = &currentValue();
1154   }
1155 
1156   return successful;
1157 }
1158 
skipCommentTokens(Token & token)1159 void OurReader::skipCommentTokens(Token& token) {
1160   if (features_.allowComments_) {
1161     do {
1162       readToken(token);
1163     } while (token.type_ == tokenComment);
1164   } else {
1165     readToken(token);
1166   }
1167 }
1168 
readToken(Token & token)1169 bool OurReader::readToken(Token& token) {
1170   skipSpaces();
1171   token.start_ = current_;
1172   Char c = getNextChar();
1173   bool ok = true;
1174   switch (c) {
1175   case '{':
1176     token.type_ = tokenObjectBegin;
1177     break;
1178   case '}':
1179     token.type_ = tokenObjectEnd;
1180     break;
1181   case '[':
1182     token.type_ = tokenArrayBegin;
1183     break;
1184   case ']':
1185     token.type_ = tokenArrayEnd;
1186     break;
1187   case '"':
1188     token.type_ = tokenString;
1189     ok = readString();
1190     break;
1191   case '\'':
1192     if (features_.allowSingleQuotes_) {
1193       token.type_ = tokenString;
1194       ok = readStringSingleQuote();
1195       break;
1196     } // else fall through
1197   case '/':
1198     token.type_ = tokenComment;
1199     ok = readComment();
1200     break;
1201   case '0':
1202   case '1':
1203   case '2':
1204   case '3':
1205   case '4':
1206   case '5':
1207   case '6':
1208   case '7':
1209   case '8':
1210   case '9':
1211     token.type_ = tokenNumber;
1212     readNumber(false);
1213     break;
1214   case '-':
1215     if (readNumber(true)) {
1216       token.type_ = tokenNumber;
1217     } else {
1218       token.type_ = tokenNegInf;
1219       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1220     }
1221     break;
1222   case '+':
1223     if (readNumber(true)) {
1224       token.type_ = tokenNumber;
1225     } else {
1226       token.type_ = tokenPosInf;
1227       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1228     }
1229     break;
1230   case 't':
1231     token.type_ = tokenTrue;
1232     ok = match("rue", 3);
1233     break;
1234   case 'f':
1235     token.type_ = tokenFalse;
1236     ok = match("alse", 4);
1237     break;
1238   case 'n':
1239     token.type_ = tokenNull;
1240     ok = match("ull", 3);
1241     break;
1242   case 'N':
1243     if (features_.allowSpecialFloats_) {
1244       token.type_ = tokenNaN;
1245       ok = match("aN", 2);
1246     } else {
1247       ok = false;
1248     }
1249     break;
1250   case 'I':
1251     if (features_.allowSpecialFloats_) {
1252       token.type_ = tokenPosInf;
1253       ok = match("nfinity", 7);
1254     } else {
1255       ok = false;
1256     }
1257     break;
1258   case ',':
1259     token.type_ = tokenArraySeparator;
1260     break;
1261   case ':':
1262     token.type_ = tokenMemberSeparator;
1263     break;
1264   case 0:
1265     token.type_ = tokenEndOfStream;
1266     break;
1267   default:
1268     ok = false;
1269     break;
1270   }
1271   if (!ok)
1272     token.type_ = tokenError;
1273   token.end_ = current_;
1274   return ok;
1275 }
1276 
skipSpaces()1277 void OurReader::skipSpaces() {
1278   while (current_ != end_) {
1279     Char c = *current_;
1280     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1281       ++current_;
1282     else
1283       break;
1284   }
1285 }
1286 
skipBom(bool skipBom)1287 void OurReader::skipBom(bool skipBom) {
1288   // The default behavior is to skip BOM.
1289   if (skipBom) {
1290     if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1291       begin_ += 3;
1292       current_ = begin_;
1293     }
1294   }
1295 }
1296 
match(const Char * pattern,int patternLength)1297 bool OurReader::match(const Char* pattern, int patternLength) {
1298   if (end_ - current_ < patternLength)
1299     return false;
1300   int index = patternLength;
1301   while (index--)
1302     if (current_[index] != pattern[index])
1303       return false;
1304   current_ += patternLength;
1305   return true;
1306 }
1307 
readComment()1308 bool OurReader::readComment() {
1309   const Location commentBegin = current_ - 1;
1310   const Char c = getNextChar();
1311   bool successful = false;
1312   bool cStyleWithEmbeddedNewline = false;
1313 
1314   const bool isCStyleComment = (c == '*');
1315   const bool isCppStyleComment = (c == '/');
1316   if (isCStyleComment) {
1317     successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1318   } else if (isCppStyleComment) {
1319     successful = readCppStyleComment();
1320   }
1321 
1322   if (!successful)
1323     return false;
1324 
1325   if (collectComments_) {
1326     CommentPlacement placement = commentBefore;
1327 
1328     if (!lastValueHasAComment_) {
1329       if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1330         if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1331           placement = commentAfterOnSameLine;
1332           lastValueHasAComment_ = true;
1333         }
1334       }
1335     }
1336 
1337     addComment(commentBegin, current_, placement);
1338   }
1339   return true;
1340 }
1341 
normalizeEOL(OurReader::Location begin,OurReader::Location end)1342 String OurReader::normalizeEOL(OurReader::Location begin,
1343                                OurReader::Location end) {
1344   String normalized;
1345   normalized.reserve(static_cast<size_t>(end - begin));
1346   OurReader::Location current = begin;
1347   while (current != end) {
1348     char c = *current++;
1349     if (c == '\r') {
1350       if (current != end && *current == '\n')
1351         // convert dos EOL
1352         ++current;
1353       // convert Mac EOL
1354       normalized += '\n';
1355     } else {
1356       normalized += c;
1357     }
1358   }
1359   return normalized;
1360 }
1361 
addComment(Location begin,Location end,CommentPlacement placement)1362 void OurReader::addComment(Location begin, Location end,
1363                            CommentPlacement placement) {
1364   assert(collectComments_);
1365   const String& normalized = normalizeEOL(begin, end);
1366   if (placement == commentAfterOnSameLine) {
1367     assert(lastValue_ != JSONCPP_NULL);
1368     lastValue_->setComment(normalized, placement);
1369   } else {
1370     commentsBefore_ += normalized;
1371   }
1372 }
1373 
readCStyleComment(bool * containsNewLineResult)1374 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1375   *containsNewLineResult = false;
1376 
1377   while ((current_ + 1) < end_) {
1378     Char c = getNextChar();
1379     if (c == '*' && *current_ == '/')
1380       break;
1381     if (c == '\n')
1382       *containsNewLineResult = true;
1383   }
1384 
1385   return getNextChar() == '/';
1386 }
1387 
readCppStyleComment()1388 bool OurReader::readCppStyleComment() {
1389   while (current_ != end_) {
1390     Char c = getNextChar();
1391     if (c == '\n')
1392       break;
1393     if (c == '\r') {
1394       // Consume DOS EOL. It will be normalized in addComment.
1395       if (current_ != end_ && *current_ == '\n')
1396         getNextChar();
1397       // Break on Moc OS 9 EOL.
1398       break;
1399     }
1400   }
1401   return true;
1402 }
1403 
readNumber(bool checkInf)1404 bool OurReader::readNumber(bool checkInf) {
1405   Location p = current_;
1406   if (checkInf && p != end_ && *p == 'I') {
1407     current_ = ++p;
1408     return false;
1409   }
1410   char c = '0'; // stopgap for already consumed character
1411   // integral part
1412   while (c >= '0' && c <= '9')
1413     c = (current_ = p) < end_ ? *p++ : '\0';
1414   // fractional part
1415   if (c == '.') {
1416     c = (current_ = p) < end_ ? *p++ : '\0';
1417     while (c >= '0' && c <= '9')
1418       c = (current_ = p) < end_ ? *p++ : '\0';
1419   }
1420   // exponential part
1421   if (c == 'e' || c == 'E') {
1422     c = (current_ = p) < end_ ? *p++ : '\0';
1423     if (c == '+' || c == '-')
1424       c = (current_ = p) < end_ ? *p++ : '\0';
1425     while (c >= '0' && c <= '9')
1426       c = (current_ = p) < end_ ? *p++ : '\0';
1427   }
1428   return true;
1429 }
readString()1430 bool OurReader::readString() {
1431   Char c = 0;
1432   while (current_ != end_) {
1433     c = getNextChar();
1434     if (c == '\\')
1435       getNextChar();
1436     else if (c == '"')
1437       break;
1438   }
1439   return c == '"';
1440 }
1441 
readStringSingleQuote()1442 bool OurReader::readStringSingleQuote() {
1443   Char c = 0;
1444   while (current_ != end_) {
1445     c = getNextChar();
1446     if (c == '\\')
1447       getNextChar();
1448     else if (c == '\'')
1449       break;
1450   }
1451   return c == '\'';
1452 }
1453 
readObject(Token & token)1454 bool OurReader::readObject(Token& token) {
1455   Token tokenName;
1456   String name;
1457   Value init(objectValue);
1458   currentValue().swapPayload(init);
1459   currentValue().setOffsetStart(token.start_ - begin_);
1460   while (readToken(tokenName)) {
1461     bool initialTokenOk = true;
1462     while (tokenName.type_ == tokenComment && initialTokenOk)
1463       initialTokenOk = readToken(tokenName);
1464     if (!initialTokenOk)
1465       break;
1466     if (tokenName.type_ == tokenObjectEnd &&
1467         (name.empty() ||
1468          features_.allowTrailingCommas_)) // empty object or trailing comma
1469       return true;
1470     name.clear();
1471     if (tokenName.type_ == tokenString) {
1472       if (!decodeString(tokenName, name))
1473         return recoverFromError(tokenObjectEnd);
1474     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1475       Value numberName;
1476       if (!decodeNumber(tokenName, numberName))
1477         return recoverFromError(tokenObjectEnd);
1478       name = numberName.asString();
1479     } else {
1480       break;
1481     }
1482     if (name.length() >= (1U << 30))
1483       throwRuntimeError("keylength >= 2^30");
1484     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1485       String msg = "Duplicate key: '" + name + "'";
1486       return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1487     }
1488 
1489     Token colon;
1490     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1491       return addErrorAndRecover("Missing ':' after object member name", colon,
1492                                 tokenObjectEnd);
1493     }
1494     Value& value = currentValue()[name];
1495     nodes_.push(&value);
1496     bool ok = readValue();
1497     nodes_.pop();
1498     if (!ok) // error already set
1499       return recoverFromError(tokenObjectEnd);
1500 
1501     Token comma;
1502     if (!readToken(comma) ||
1503         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1504          comma.type_ != tokenComment)) {
1505       return addErrorAndRecover("Missing ',' or '}' in object declaration",
1506                                 comma, tokenObjectEnd);
1507     }
1508     bool finalizeTokenOk = true;
1509     while (comma.type_ == tokenComment && finalizeTokenOk)
1510       finalizeTokenOk = readToken(comma);
1511     if (comma.type_ == tokenObjectEnd)
1512       return true;
1513   }
1514   return addErrorAndRecover("Missing '}' or object member name", tokenName,
1515                             tokenObjectEnd);
1516 }
1517 
readArray(Token & token)1518 bool OurReader::readArray(Token& token) {
1519   Value init(arrayValue);
1520   currentValue().swapPayload(init);
1521   currentValue().setOffsetStart(token.start_ - begin_);
1522   int index = 0;
1523   for (;;) {
1524     skipSpaces();
1525     if (current_ != end_ && *current_ == ']' &&
1526         (index == 0 ||
1527          (features_.allowTrailingCommas_ &&
1528           !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1529                                                       // comma
1530     {
1531       Token endArray;
1532       readToken(endArray);
1533       return true;
1534     }
1535     Value& value = currentValue()[index++];
1536     nodes_.push(&value);
1537     bool ok = readValue();
1538     nodes_.pop();
1539     if (!ok) // error already set
1540       return recoverFromError(tokenArrayEnd);
1541 
1542     Token currentToken;
1543     // Accept Comment after last item in the array.
1544     ok = readToken(currentToken);
1545     while (currentToken.type_ == tokenComment && ok) {
1546       ok = readToken(currentToken);
1547     }
1548     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1549                          currentToken.type_ != tokenArrayEnd);
1550     if (!ok || badTokenType) {
1551       return addErrorAndRecover("Missing ',' or ']' in array declaration",
1552                                 currentToken, tokenArrayEnd);
1553     }
1554     if (currentToken.type_ == tokenArrayEnd)
1555       break;
1556   }
1557   return true;
1558 }
1559 
decodeNumber(Token & token)1560 bool OurReader::decodeNumber(Token& token) {
1561   Value decoded;
1562   if (!decodeNumber(token, decoded))
1563     return false;
1564   currentValue().swapPayload(decoded);
1565   currentValue().setOffsetStart(token.start_ - begin_);
1566   currentValue().setOffsetLimit(token.end_ - begin_);
1567   return true;
1568 }
1569 
decodeNumber(Token & token,Value & decoded)1570 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1571   // Attempts to parse the number as an integer. If the number is
1572   // larger than the maximum supported value of an integer then
1573   // we decode the number as a double.
1574   Location current = token.start_;
1575   const bool isNegative = *current == '-';
1576   if (isNegative) {
1577     ++current;
1578   }
1579 
1580   // We assume we can represent the largest and smallest integer types as
1581   // unsigned integers with separate sign. This is only true if they can fit
1582   // into an unsigned integer.
1583   JSONCPP_STATIC_ASSERT(LargestUInt(Value::maxLargestInt) <=
1584                             Value::maxLargestUInt,
1585                         "Int must be smaller than Uint");
1586   // We need to convert minLargestInt into a positive number. The easiest way
1587   // to do this conversion is to assume our "threshold" value of minLargestInt
1588   // divided by 10 can fit in maxLargestInt when absolute valued. This should
1589   // be a safe assumption.
1590   JSONCPP_STATIC_ASSERT(
1591       Value::minLargestInt <= -Value::maxLargestInt,
1592       "The absolute value of minLargestInt must ve greater than or"
1593       "equal to maxLargestInt");
1594 
1595   JSONCPP_STATIC_ASSERT(
1596       Value::minLargestInt / 10 >= -Value::maxLargestInt,
1597       "The absolute value of minLargestInt must be only 1 magnitude"
1598       "larger than maxLargestInt");
1599 
1600   static JSONCPP_CONST Value::LargestUInt positive_threshold =
1601       Value::maxLargestUInt / 10;
1602   static JSONCPP_CONST Value::UInt positive_last_digit =
1603       Value::maxLargestUInt % 10;
1604 
1605   // For the negative values, we have to be more careful. Since typically
1606   // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1607   // then take the inverse. This assumes that minLargestInt is only a single
1608   // power of 10 different in magnitude, which we check above. For the last
1609   // digit, we take the modulus before negating for the same reason.
1610   static JSONCPP_CONST Value::LargestUInt negative_threshold =
1611       Value::LargestUInt(-(Value::minLargestInt / 10));
1612   static JSONCPP_CONST Value::UInt negative_last_digit =
1613       Value::UInt(-(Value::minLargestInt % 10));
1614 
1615   const Value::LargestUInt threshold =
1616       isNegative ? negative_threshold : positive_threshold;
1617   const Value::UInt max_last_digit =
1618       isNegative ? negative_last_digit : positive_last_digit;
1619 
1620   Value::LargestUInt value = 0;
1621   while (current < token.end_) {
1622     Char c = *current++;
1623     if (c < '0' || c > '9')
1624       return decodeDouble(token, decoded);
1625 
1626     const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
1627     if (value >= threshold) {
1628       // We've hit or exceeded the max value divided by 10 (rounded down). If
1629       // a) we've only just touched the limit, meaing value == threshold,
1630       // b) this is the last digit, or
1631       // c) it's small enough to fit in that rounding delta, we're okay.
1632       // Otherwise treat this number as a double to avoid overflow.
1633       if (value > threshold || current != token.end_ ||
1634           digit > max_last_digit) {
1635         return decodeDouble(token, decoded);
1636       }
1637     }
1638     value = value * 10 + digit;
1639   }
1640 
1641   if (isNegative) {
1642     // We use the same magnitude assumption here, just in case.
1643     const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
1644     decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1645   } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1646     decoded = Value::LargestInt(value);
1647   } else {
1648     decoded = value;
1649   }
1650 
1651   return true;
1652 }
1653 
decodeDouble(Token & token)1654 bool OurReader::decodeDouble(Token& token) {
1655   Value decoded;
1656   if (!decodeDouble(token, decoded))
1657     return false;
1658   currentValue().swapPayload(decoded);
1659   currentValue().setOffsetStart(token.start_ - begin_);
1660   currentValue().setOffsetLimit(token.end_ - begin_);
1661   return true;
1662 }
1663 
decodeDouble(Token & token,Value & decoded)1664 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1665   double value = 0;
1666   const String buffer(token.start_, token.end_);
1667   IStringStream is(buffer);
1668   if (!(is >> value)) {
1669     return addError(
1670         "'" + String(token.start_, token.end_) + "' is not a number.", token);
1671   }
1672   decoded = value;
1673   return true;
1674 }
1675 
decodeString(Token & token)1676 bool OurReader::decodeString(Token& token) {
1677   String decoded_string;
1678   if (!decodeString(token, decoded_string))
1679     return false;
1680   Value decoded(decoded_string);
1681   currentValue().swapPayload(decoded);
1682   currentValue().setOffsetStart(token.start_ - begin_);
1683   currentValue().setOffsetLimit(token.end_ - begin_);
1684   return true;
1685 }
1686 
decodeString(Token & token,String & decoded)1687 bool OurReader::decodeString(Token& token, String& decoded) {
1688   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1689   Location current = token.start_ + 1; // skip '"'
1690   Location end = token.end_ - 1;       // do not include '"'
1691   while (current != end) {
1692     Char c = *current++;
1693     if (c == '"')
1694       break;
1695     if (c == '\\') {
1696       if (current == end)
1697         return addError("Empty escape sequence in string", token, current);
1698       Char escape = *current++;
1699       switch (escape) {
1700       case '"':
1701         decoded += '"';
1702         break;
1703       case '/':
1704         decoded += '/';
1705         break;
1706       case '\\':
1707         decoded += '\\';
1708         break;
1709       case 'b':
1710         decoded += '\b';
1711         break;
1712       case 'f':
1713         decoded += '\f';
1714         break;
1715       case 'n':
1716         decoded += '\n';
1717         break;
1718       case 'r':
1719         decoded += '\r';
1720         break;
1721       case 't':
1722         decoded += '\t';
1723         break;
1724       case 'u': {
1725         unsigned int unicode;
1726         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1727           return false;
1728         decoded += codePointToUTF8(unicode);
1729       } break;
1730       default:
1731         return addError("Bad escape sequence in string", token, current);
1732       }
1733     } else {
1734       decoded += c;
1735     }
1736   }
1737   return true;
1738 }
1739 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1740 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1741                                        Location end, unsigned int& unicode) {
1742 
1743   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1744     return false;
1745   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1746     // surrogate pairs
1747     if (end - current < 6)
1748       return addError(
1749           "additional six characters expected to parse unicode surrogate pair.",
1750           token, current);
1751     if (*(current++) == '\\' && *(current++) == 'u') {
1752       unsigned int surrogatePair;
1753       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1754         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1755       } else
1756         return false;
1757     } else
1758       return addError("expecting another \\u token to begin the second half of "
1759                       "a unicode surrogate pair",
1760                       token, current);
1761   }
1762   return true;
1763 }
1764 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1765 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1766                                             Location end,
1767                                             unsigned int& ret_unicode) {
1768   if (end - current < 4)
1769     return addError(
1770         "Bad unicode escape sequence in string: four digits expected.", token,
1771         current);
1772   int unicode = 0;
1773   for (int index = 0; index < 4; ++index) {
1774     Char c = *current++;
1775     unicode *= 16;
1776     if (c >= '0' && c <= '9')
1777       unicode += c - '0';
1778     else if (c >= 'a' && c <= 'f')
1779       unicode += c - 'a' + 10;
1780     else if (c >= 'A' && c <= 'F')
1781       unicode += c - 'A' + 10;
1782     else
1783       return addError(
1784           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1785           token, current);
1786   }
1787   ret_unicode = static_cast<unsigned int>(unicode);
1788   return true;
1789 }
1790 
addError(const String & message,Token & token,Location extra)1791 bool OurReader::addError(const String& message, Token& token, Location extra) {
1792   ErrorInfo info;
1793   info.token_ = token;
1794   info.message_ = message;
1795   info.extra_ = extra;
1796   errors_.push_back(info);
1797   return false;
1798 }
1799 
recoverFromError(TokenType skipUntilToken)1800 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1801   size_t errorCount = errors_.size();
1802   Token skip;
1803   for (;;) {
1804     if (!readToken(skip))
1805       errors_.resize(errorCount); // discard errors caused by recovery
1806     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1807       break;
1808   }
1809   errors_.resize(errorCount);
1810   return false;
1811 }
1812 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1813 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1814                                    TokenType skipUntilToken) {
1815   addError(message, token);
1816   return recoverFromError(skipUntilToken);
1817 }
1818 
currentValue()1819 Value& OurReader::currentValue() { return *(nodes_.top()); }
1820 
getNextChar()1821 OurReader::Char OurReader::getNextChar() {
1822   if (current_ == end_)
1823     return 0;
1824   return *current_++;
1825 }
1826 
getLocationLineAndColumn(Location location,int & line,int & column) const1827 void OurReader::getLocationLineAndColumn(Location location, int& line,
1828                                          int& column) const {
1829   Location current = begin_;
1830   Location lastLineStart = current;
1831   line = 0;
1832   while (current < location && current != end_) {
1833     Char c = *current++;
1834     if (c == '\r') {
1835       if (*current == '\n')
1836         ++current;
1837       lastLineStart = current;
1838       ++line;
1839     } else if (c == '\n') {
1840       lastLineStart = current;
1841       ++line;
1842     }
1843   }
1844   // column & line start at 1
1845   column = int(location - lastLineStart) + 1;
1846   ++line;
1847 }
1848 
getLocationLineAndColumn(Location location) const1849 String OurReader::getLocationLineAndColumn(Location location) const {
1850   int line, column;
1851   getLocationLineAndColumn(location, line, column);
1852   char buffer[18 + 16 + 16 + 1];
1853   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1854   return buffer;
1855 }
1856 
getFormattedErrorMessages() const1857 String OurReader::getFormattedErrorMessages() const {
1858   String formattedMessage;
1859   for (Errors::const_iterator itError = errors_.begin();
1860        itError != errors_.end(); ++itError) {
1861     const ErrorInfo& error = *itError;
1862     formattedMessage +=
1863         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1864     formattedMessage += "  " + error.message_ + "\n";
1865     if (error.extra_)
1866       formattedMessage +=
1867           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1868   }
1869   return formattedMessage;
1870 }
1871 
getStructuredErrors() const1872 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1873   std::vector<OurReader::StructuredError> allErrors;
1874   for (Errors::const_iterator itError = errors_.begin();
1875        itError != errors_.end(); ++itError) {
1876     const ErrorInfo& error = *itError;
1877     OurReader::StructuredError structured;
1878     structured.offset_start = error.token_.start_ - begin_;
1879     structured.offset_limit = error.token_.end_ - begin_;
1880     structured.message = error.message_;
1881     allErrors.push_back(structured);
1882   }
1883   return allErrors;
1884 }
1885 
1886 class OurCharReader : public CharReader {
1887   bool const collectComments_;
1888   OurReader reader_;
1889 
1890 public:
OurCharReader(bool collectComments,OurFeatures const & features)1891   OurCharReader(bool collectComments, OurFeatures const& features)
1892       : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1893   bool parse(char const* beginDoc, char const* endDoc, Value* root,
1894              String* errs) JSONCPP_OVERRIDE {
1895     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1896     if (errs) {
1897       *errs = reader_.getFormattedErrorMessages();
1898     }
1899     return ok;
1900   }
1901 };
1902 
CharReaderBuilder()1903 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
~CharReaderBuilder()1904 CharReaderBuilder::~CharReaderBuilder() {}
newCharReader() const1905 CharReader* CharReaderBuilder::newCharReader() const {
1906   bool collectComments = settings_["collectComments"].asBool();
1907   OurFeatures features = OurFeatures::all();
1908   features.allowComments_ = settings_["allowComments"].asBool();
1909   features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1910   features.strictRoot_ = settings_["strictRoot"].asBool();
1911   features.allowDroppedNullPlaceholders_ =
1912       settings_["allowDroppedNullPlaceholders"].asBool();
1913   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1914   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1915 
1916   // Stack limit is always a size_t, so we get this as an unsigned int
1917   // regardless of it we have 64-bit integer support enabled.
1918   features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1919   features.failIfExtra_ = settings_["failIfExtra"].asBool();
1920   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1921   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1922   features.skipBom_ = settings_["skipBom"].asBool();
1923   return new OurCharReader(collectComments, features);
1924 }
getValidReaderKeys(std::set<String> * valid_keys)1925 static void getValidReaderKeys(std::set<String>* valid_keys) {
1926   valid_keys->clear();
1927   valid_keys->insert("collectComments");
1928   valid_keys->insert("allowComments");
1929   valid_keys->insert("allowTrailingCommas");
1930   valid_keys->insert("strictRoot");
1931   valid_keys->insert("allowDroppedNullPlaceholders");
1932   valid_keys->insert("allowNumericKeys");
1933   valid_keys->insert("allowSingleQuotes");
1934   valid_keys->insert("stackLimit");
1935   valid_keys->insert("failIfExtra");
1936   valid_keys->insert("rejectDupKeys");
1937   valid_keys->insert("allowSpecialFloats");
1938   valid_keys->insert("skipBom");
1939 }
validate(Json::Value * invalid) const1940 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1941   Json::Value my_invalid;
1942   if (!invalid)
1943     invalid = &my_invalid; // so we do not need to test for NULL
1944   Json::Value& inv = *invalid;
1945   std::set<String> valid_keys;
1946   getValidReaderKeys(&valid_keys);
1947   Value::Members keys = settings_.getMemberNames();
1948   size_t n = keys.size();
1949   for (size_t i = 0; i < n; ++i) {
1950     String const& key = keys[i];
1951     if (valid_keys.find(key) == valid_keys.end()) {
1952       inv[key] = settings_[key];
1953     }
1954   }
1955   return inv.empty();
1956 }
operator [](const String & key)1957 Value& CharReaderBuilder::operator[](const String& key) {
1958   return settings_[key];
1959 }
1960 // static
strictMode(Json::Value * settings)1961 void CharReaderBuilder::strictMode(Json::Value* settings) {
1962   //! [CharReaderBuilderStrictMode]
1963   (*settings)["allowComments"] = false;
1964   (*settings)["allowTrailingCommas"] = false;
1965   (*settings)["strictRoot"] = true;
1966   (*settings)["allowDroppedNullPlaceholders"] = false;
1967   (*settings)["allowNumericKeys"] = false;
1968   (*settings)["allowSingleQuotes"] = false;
1969   (*settings)["stackLimit"] = 1000;
1970   (*settings)["failIfExtra"] = true;
1971   (*settings)["rejectDupKeys"] = true;
1972   (*settings)["allowSpecialFloats"] = false;
1973   (*settings)["skipBom"] = true;
1974   //! [CharReaderBuilderStrictMode]
1975 }
1976 // static
setDefaults(Json::Value * settings)1977 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1978   //! [CharReaderBuilderDefaults]
1979   (*settings)["collectComments"] = true;
1980   (*settings)["allowComments"] = true;
1981   (*settings)["allowTrailingCommas"] = true;
1982   (*settings)["strictRoot"] = false;
1983   (*settings)["allowDroppedNullPlaceholders"] = false;
1984   (*settings)["allowNumericKeys"] = false;
1985   (*settings)["allowSingleQuotes"] = false;
1986   (*settings)["stackLimit"] = 1000;
1987   (*settings)["failIfExtra"] = false;
1988   (*settings)["rejectDupKeys"] = false;
1989   (*settings)["allowSpecialFloats"] = false;
1990   (*settings)["skipBom"] = true;
1991   //! [CharReaderBuilderDefaults]
1992 }
1993 
1994 //////////////////////////////////
1995 // global functions
1996 
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1997 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1998                      String* errs) {
1999   OStringStream ssin;
2000   ssin << sin.rdbuf();
2001   String doc = ssin.str();
2002   char const* begin = doc.data();
2003   char const* end = begin + doc.size();
2004   // Note that we do not actually need a null-terminator.
2005   CharReaderPtr const reader(fact.newCharReader());
2006   bool ret = reader->parse(begin, end, root, errs);
2007   delete reader;
2008   return ret;
2009 }
2010 
operator >>(IStream & sin,Value & root)2011 IStream& operator>>(IStream& sin, Value& root) {
2012   CharReaderBuilder b;
2013   String errs;
2014   bool ok = parseFromStream(b, sin, &root, &errs);
2015   if (!ok) {
2016     throwRuntimeError(errs);
2017   }
2018   return sin;
2019 }
2020 
2021 } // namespace Json
2022