• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <iostream>
16 #include <istream>
17 #include <limits>
18 #include <memory>
19 #include <set>
20 #include <sstream>
21 #include <utility>
22 
23 #include <cstdio>
24 #if __cplusplus >= 201103L
25 
26 #if !defined(sscanf)
27 #define sscanf std::sscanf
28 #endif
29 
30 #endif //__cplusplus
31 
32 #if defined(_MSC_VER)
33 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
34 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
35 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
36 #endif //_MSC_VER
37 
38 #if defined(_MSC_VER)
39 // Disable warning about strdup being deprecated.
40 #pragma warning(disable : 4996)
41 #endif
42 
43 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
44 // time to change the stack limit
45 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
46 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
47 #endif
48 
49 static size_t const stackLimit_g =
50     JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
51 
52 namespace Json {
53 
54 typedef CharReader* CharReaderPtr;
55 
56 // Implementation of class Features
57 // ////////////////////////////////
58 
Features()59 Features::Features()
60     : allowComments_(true), strictRoot_(false),
61       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
all()62 Features Features::all() { return Features(); }
63 
strictMode()64 Features Features::strictMode() {
65   Features features;
66   features.allowComments_ = false;
67   features.strictRoot_ = true;
68   features.allowDroppedNullPlaceholders_ = false;
69   features.allowNumericKeys_ = false;
70   return features;
71 }
72 
73 // Implementation of class Reader
74 // ////////////////////////////////
75 
containsNewLine(Reader::Location begin,Reader::Location end)76 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
77   for (; begin < end; ++begin)
78     if (*begin == '\n' || *begin == '\r')
79       return true;
80   return false;
81 }
82 
83 // Class Reader
84 // //////////////////////////////////////////////////////////////////
85 
Reader()86 Reader::Reader()
87     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
88       lastValue_(), commentsBefore_(), features_(Features::all()),
89       collectComments_() {}
90 
Reader(const Features & features)91 Reader::Reader(const Features& features)
92     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95 
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document, Value& root,
97                    bool collectComments) {
98   document_.assign(document.begin(), document.end());
99   const char* begin = document_.c_str();
100   const char* end = begin + document_.length();
101   return parse(begin, end, root, collectComments);
102 }
103 
parse(std::istream & is,Value & root,bool collectComments)104 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
105   // std::istream_iterator<char> begin(is);
106   // std::istream_iterator<char> end;
107   // Those would allow streamed input from a file, if parse() were a
108   // template function.
109 
110   // Since String is reference-counted, this at least does not
111   // create an extra copy.
112   String doc;
113   std::getline(is, doc, static_cast<char> EOF);
114   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
115 }
116 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)117 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
118                    bool collectComments) {
119   if (!features_.allowComments_) {
120     collectComments = false;
121   }
122 
123   begin_ = beginDoc;
124   end_ = endDoc;
125   collectComments_ = collectComments;
126   current_ = begin_;
127   lastValueEnd_ = JSONCPP_NULL;
128   lastValue_ = JSONCPP_NULL;
129   commentsBefore_.clear();
130   errors_.clear();
131   while (!nodes_.empty())
132     nodes_.pop();
133   nodes_.push(&root);
134 
135   bool successful = readValue();
136   Token token;
137   skipCommentTokens(token);
138   if (collectComments_ && !commentsBefore_.empty())
139     root.setComment(commentsBefore_, commentAfter);
140   if (features_.strictRoot_) {
141     if (!root.isArray() && !root.isObject()) {
142       // Set error location to start of doc, ideally should be first token found
143       // in doc
144       token.type_ = tokenError;
145       token.start_ = beginDoc;
146       token.end_ = endDoc;
147       addError(
148           "A valid JSON document must be either an array or an object value.",
149           token);
150       return false;
151     }
152   }
153   return successful;
154 }
155 
readValue()156 bool Reader::readValue() {
157   // readValue() may call itself only if it calls readObject() or ReadArray().
158   // These methods execute nodes_.push() just before and nodes_.pop)() just
159   // after calling readValue(). parse() executes one nodes_.push(), so > instead
160   // of >=.
161   if (nodes_.size() > stackLimit_g)
162     throwRuntimeError("Exceeded stackLimit in readValue().");
163 
164   Token token;
165   skipCommentTokens(token);
166   bool successful = true;
167 
168   if (collectComments_ && !commentsBefore_.empty()) {
169     currentValue().setComment(commentsBefore_, commentBefore);
170     commentsBefore_.clear();
171   }
172 
173   switch (token.type_) {
174   case tokenObjectBegin:
175     successful = readObject(token);
176     currentValue().setOffsetLimit(current_ - begin_);
177     break;
178   case tokenArrayBegin:
179     successful = readArray(token);
180     currentValue().setOffsetLimit(current_ - begin_);
181     break;
182   case tokenNumber:
183     successful = decodeNumber(token);
184     break;
185   case tokenString:
186     successful = decodeString(token);
187     break;
188   case tokenTrue: {
189     Value v(true);
190     currentValue().swapPayload(v);
191     currentValue().setOffsetStart(token.start_ - begin_);
192     currentValue().setOffsetLimit(token.end_ - begin_);
193   } break;
194   case tokenFalse: {
195     Value v(false);
196     currentValue().swapPayload(v);
197     currentValue().setOffsetStart(token.start_ - begin_);
198     currentValue().setOffsetLimit(token.end_ - begin_);
199   } break;
200   case tokenNull: {
201     Value v;
202     currentValue().swapPayload(v);
203     currentValue().setOffsetStart(token.start_ - begin_);
204     currentValue().setOffsetLimit(token.end_ - begin_);
205   } break;
206   case tokenArraySeparator:
207   case tokenObjectEnd:
208   case tokenArrayEnd:
209     if (features_.allowDroppedNullPlaceholders_) {
210       // "Un-read" the current token and mark the current value as a null
211       // token.
212       current_--;
213       Value v;
214       currentValue().swapPayload(v);
215       currentValue().setOffsetStart(current_ - begin_ - 1);
216       currentValue().setOffsetLimit(current_ - begin_);
217       break;
218     } // Else, fall through...
219   default:
220     currentValue().setOffsetStart(token.start_ - begin_);
221     currentValue().setOffsetLimit(token.end_ - begin_);
222     return addError("Syntax error: value, object or array expected.", token);
223   }
224 
225   if (collectComments_) {
226     lastValueEnd_ = current_;
227     lastValue_ = &currentValue();
228   }
229 
230   return successful;
231 }
232 
skipCommentTokens(Token & token)233 void Reader::skipCommentTokens(Token& token) {
234   if (features_.allowComments_) {
235     do {
236       readToken(token);
237     } while (token.type_ == tokenComment);
238   } else {
239     readToken(token);
240   }
241 }
242 
readToken(Token & token)243 bool Reader::readToken(Token& token) {
244   skipSpaces();
245   token.start_ = current_;
246   Char c = getNextChar();
247   bool ok = true;
248   switch (c) {
249   case '{':
250     token.type_ = tokenObjectBegin;
251     break;
252   case '}':
253     token.type_ = tokenObjectEnd;
254     break;
255   case '[':
256     token.type_ = tokenArrayBegin;
257     break;
258   case ']':
259     token.type_ = tokenArrayEnd;
260     break;
261   case '"':
262     token.type_ = tokenString;
263     ok = readString();
264     break;
265   case '/':
266     token.type_ = tokenComment;
267     ok = readComment();
268     break;
269   case '0':
270   case '1':
271   case '2':
272   case '3':
273   case '4':
274   case '5':
275   case '6':
276   case '7':
277   case '8':
278   case '9':
279   case '-':
280     token.type_ = tokenNumber;
281     readNumber();
282     break;
283   case 't':
284     token.type_ = tokenTrue;
285     ok = match("rue", 3);
286     break;
287   case 'f':
288     token.type_ = tokenFalse;
289     ok = match("alse", 4);
290     break;
291   case 'n':
292     token.type_ = tokenNull;
293     ok = match("ull", 3);
294     break;
295   case ',':
296     token.type_ = tokenArraySeparator;
297     break;
298   case ':':
299     token.type_ = tokenMemberSeparator;
300     break;
301   case 0:
302     token.type_ = tokenEndOfStream;
303     break;
304   default:
305     ok = false;
306     break;
307   }
308   if (!ok)
309     token.type_ = tokenError;
310   token.end_ = current_;
311   return ok;
312 }
313 
skipSpaces()314 void Reader::skipSpaces() {
315   while (current_ != end_) {
316     Char c = *current_;
317     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
318       ++current_;
319     else
320       break;
321   }
322 }
323 
match(const Char * pattern,int patternLength)324 bool Reader::match(const Char* pattern, int patternLength) {
325   if (end_ - current_ < patternLength)
326     return false;
327   int index = patternLength;
328   while (index--)
329     if (current_[index] != pattern[index])
330       return false;
331   current_ += patternLength;
332   return true;
333 }
334 
readComment()335 bool Reader::readComment() {
336   Location commentBegin = current_ - 1;
337   Char c = getNextChar();
338   bool successful = false;
339   if (c == '*')
340     successful = readCStyleComment();
341   else if (c == '/')
342     successful = readCppStyleComment();
343   if (!successful)
344     return false;
345 
346   if (collectComments_) {
347     CommentPlacement placement = commentBefore;
348     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
349       if (c != '*' || !containsNewLine(commentBegin, current_))
350         placement = commentAfterOnSameLine;
351     }
352 
353     addComment(commentBegin, current_, placement);
354   }
355   return true;
356 }
357 
normalizeEOL(Reader::Location begin,Reader::Location end)358 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
359   String normalized;
360   normalized.reserve(static_cast<size_t>(end - begin));
361   Reader::Location current = begin;
362   while (current != end) {
363     char c = *current++;
364     if (c == '\r') {
365       if (current != end && *current == '\n')
366         // convert dos EOL
367         ++current;
368       // convert Mac EOL
369       normalized += '\n';
370     } else {
371       normalized += c;
372     }
373   }
374   return normalized;
375 }
376 
addComment(Location begin,Location end,CommentPlacement placement)377 void Reader::addComment(Location begin, Location end,
378                         CommentPlacement placement) {
379   assert(collectComments_);
380   const String& normalized = normalizeEOL(begin, end);
381   if (placement == commentAfterOnSameLine) {
382     assert(lastValue_ != JSONCPP_NULL);
383     lastValue_->setComment(normalized, placement);
384   } else {
385     commentsBefore_ += normalized;
386   }
387 }
388 
readCStyleComment()389 bool Reader::readCStyleComment() {
390   while ((current_ + 1) < end_) {
391     Char c = getNextChar();
392     if (c == '*' && *current_ == '/')
393       break;
394   }
395   return getNextChar() == '/';
396 }
397 
readCppStyleComment()398 bool Reader::readCppStyleComment() {
399   while (current_ != end_) {
400     Char c = getNextChar();
401     if (c == '\n')
402       break;
403     if (c == '\r') {
404       // Consume DOS EOL. It will be normalized in addComment.
405       if (current_ != end_ && *current_ == '\n')
406         getNextChar();
407       // Break on Moc OS 9 EOL.
408       break;
409     }
410   }
411   return true;
412 }
413 
readNumber()414 void Reader::readNumber() {
415   Location p = current_;
416   char c = '0'; // stopgap for already consumed character
417   // integral part
418   while (c >= '0' && c <= '9')
419     c = (current_ = p) < end_ ? *p++ : '\0';
420   // fractional part
421   if (c == '.') {
422     c = (current_ = p) < end_ ? *p++ : '\0';
423     while (c >= '0' && c <= '9')
424       c = (current_ = p) < end_ ? *p++ : '\0';
425   }
426   // exponential part
427   if (c == 'e' || c == 'E') {
428     c = (current_ = p) < end_ ? *p++ : '\0';
429     if (c == '+' || c == '-')
430       c = (current_ = p) < end_ ? *p++ : '\0';
431     while (c >= '0' && c <= '9')
432       c = (current_ = p) < end_ ? *p++ : '\0';
433   }
434 }
435 
readString()436 bool Reader::readString() {
437   Char c = '\0';
438   while (current_ != end_) {
439     c = getNextChar();
440     if (c == '\\')
441       getNextChar();
442     else if (c == '"')
443       break;
444   }
445   return c == '"';
446 }
447 
readObject(Token & token)448 bool Reader::readObject(Token& token) {
449   Token tokenName;
450   String name;
451   Value init(objectValue);
452   currentValue().swapPayload(init);
453   currentValue().setOffsetStart(token.start_ - begin_);
454   while (readToken(tokenName)) {
455     bool initialTokenOk = true;
456     while (tokenName.type_ == tokenComment && initialTokenOk)
457       initialTokenOk = readToken(tokenName);
458     if (!initialTokenOk)
459       break;
460     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
461       return true;
462     name.clear();
463     if (tokenName.type_ == tokenString) {
464       if (!decodeString(tokenName, name))
465         return recoverFromError(tokenObjectEnd);
466     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
467       Value numberName;
468       if (!decodeNumber(tokenName, numberName))
469         return recoverFromError(tokenObjectEnd);
470       name = numberName.asString();
471     } else {
472       break;
473     }
474 
475     Token colon;
476     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
477       return addErrorAndRecover("Missing ':' after object member name", colon,
478                                 tokenObjectEnd);
479     }
480     Value& value = currentValue()[name];
481     nodes_.push(&value);
482     bool ok = readValue();
483     nodes_.pop();
484     if (!ok) // error already set
485       return recoverFromError(tokenObjectEnd);
486 
487     Token comma;
488     if (!readToken(comma) ||
489         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
490          comma.type_ != tokenComment)) {
491       return addErrorAndRecover("Missing ',' or '}' in object declaration",
492                                 comma, tokenObjectEnd);
493     }
494     bool finalizeTokenOk = true;
495     while (comma.type_ == tokenComment && finalizeTokenOk)
496       finalizeTokenOk = readToken(comma);
497     if (comma.type_ == tokenObjectEnd)
498       return true;
499   }
500   return addErrorAndRecover("Missing '}' or object member name", tokenName,
501                             tokenObjectEnd);
502 }
503 
readArray(Token & token)504 bool Reader::readArray(Token& token) {
505   Value init(arrayValue);
506   currentValue().swapPayload(init);
507   currentValue().setOffsetStart(token.start_ - begin_);
508   skipSpaces();
509   if (current_ != end_ && *current_ == ']') // empty array
510   {
511     Token endArray;
512     readToken(endArray);
513     return true;
514   }
515   int index = 0;
516   for (;;) {
517     Value& value = currentValue()[index++];
518     nodes_.push(&value);
519     bool ok = readValue();
520     nodes_.pop();
521     if (!ok) // error already set
522       return recoverFromError(tokenArrayEnd);
523 
524     Token currentToken;
525     // Accept Comment after last item in the array.
526     ok = readToken(currentToken);
527     while (currentToken.type_ == tokenComment && ok) {
528       ok = readToken(currentToken);
529     }
530     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
531                          currentToken.type_ != tokenArrayEnd);
532     if (!ok || badTokenType) {
533       return addErrorAndRecover("Missing ',' or ']' in array declaration",
534                                 currentToken, tokenArrayEnd);
535     }
536     if (currentToken.type_ == tokenArrayEnd)
537       break;
538   }
539   return true;
540 }
541 
decodeNumber(Token & token)542 bool Reader::decodeNumber(Token& token) {
543   Value decoded;
544   if (!decodeNumber(token, decoded))
545     return false;
546   currentValue().swapPayload(decoded);
547   currentValue().setOffsetStart(token.start_ - begin_);
548   currentValue().setOffsetLimit(token.end_ - begin_);
549   return true;
550 }
551 
decodeNumber(Token & token,Value & decoded)552 bool Reader::decodeNumber(Token& token, Value& decoded) {
553   // Attempts to parse the number as an integer. If the number is
554   // larger than the maximum supported value of an integer then
555   // we decode the number as a double.
556   Location current = token.start_;
557   bool isNegative = *current == '-';
558   if (isNegative)
559     ++current;
560   // TODO: Help the compiler do the div and mod at compile time or get rid of
561   // them.
562   Value::LargestUInt maxIntegerValue =
563       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
564                  : Value::maxLargestUInt;
565   Value::LargestUInt threshold = maxIntegerValue / 10;
566   Value::LargestUInt value = 0;
567   while (current < token.end_) {
568     Char c = *current++;
569     if (c < '0' || c > '9')
570       return decodeDouble(token, decoded);
571     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
572     if (value >= threshold) {
573       // We've hit or exceeded the max value divided by 10 (rounded down). If
574       // a) we've only just touched the limit, b) this is the last digit, and
575       // c) it's small enough to fit in that rounding delta, we're okay.
576       // Otherwise treat this number as a double to avoid overflow.
577       if (value > threshold || current != token.end_ ||
578           digit > maxIntegerValue % 10) {
579         return decodeDouble(token, decoded);
580       }
581     }
582     value = value * 10 + digit;
583   }
584   if (isNegative && value == maxIntegerValue)
585     decoded = Value::minLargestInt;
586   else if (isNegative)
587     decoded = -Value::LargestInt(value);
588   else if (value <= Value::LargestUInt(Value::maxInt))
589     decoded = Value::LargestInt(value);
590   else
591     decoded = value;
592   return true;
593 }
594 
decodeDouble(Token & token)595 bool Reader::decodeDouble(Token& token) {
596   Value decoded;
597   if (!decodeDouble(token, decoded))
598     return false;
599   currentValue().swapPayload(decoded);
600   currentValue().setOffsetStart(token.start_ - begin_);
601   currentValue().setOffsetLimit(token.end_ - begin_);
602   return true;
603 }
604 
decodeDouble(Token & token,Value & decoded)605 bool Reader::decodeDouble(Token& token, Value& decoded) {
606   double value = 0;
607   String buffer(token.start_, token.end_);
608   IStringStream is(buffer);
609   if (!(is >> value))
610     return addError(
611         "'" + String(token.start_, token.end_) + "' is not a number.", token);
612   decoded = value;
613   return true;
614 }
615 
decodeString(Token & token)616 bool Reader::decodeString(Token& token) {
617   String decoded_string;
618   if (!decodeString(token, decoded_string))
619     return false;
620   Value decoded(decoded_string);
621   currentValue().swapPayload(decoded);
622   currentValue().setOffsetStart(token.start_ - begin_);
623   currentValue().setOffsetLimit(token.end_ - begin_);
624   return true;
625 }
626 
decodeString(Token & token,String & decoded)627 bool Reader::decodeString(Token& token, String& decoded) {
628   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
629   Location current = token.start_ + 1; // skip '"'
630   Location end = token.end_ - 1;       // do not include '"'
631   while (current != end) {
632     Char c = *current++;
633     if (c == '"')
634       break;
635     if (c == '\\') {
636       if (current == end)
637         return addError("Empty escape sequence in string", token, current);
638       Char escape = *current++;
639       switch (escape) {
640       case '"':
641         decoded += '"';
642         break;
643       case '/':
644         decoded += '/';
645         break;
646       case '\\':
647         decoded += '\\';
648         break;
649       case 'b':
650         decoded += '\b';
651         break;
652       case 'f':
653         decoded += '\f';
654         break;
655       case 'n':
656         decoded += '\n';
657         break;
658       case 'r':
659         decoded += '\r';
660         break;
661       case 't':
662         decoded += '\t';
663         break;
664       case 'u': {
665         unsigned int unicode;
666         if (!decodeUnicodeCodePoint(token, current, end, unicode))
667           return false;
668         decoded += codePointToUTF8(unicode);
669       } break;
670       default:
671         return addError("Bad escape sequence in string", token, current);
672       }
673     } else {
674       decoded += c;
675     }
676   }
677   return true;
678 }
679 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)680 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
681                                     Location end, unsigned int& unicode) {
682 
683   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
684     return false;
685   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
686     // surrogate pairs
687     if (end - current < 6)
688       return addError(
689           "additional six characters expected to parse unicode surrogate pair.",
690           token, current);
691     if (*(current++) == '\\' && *(current++) == 'u') {
692       unsigned int surrogatePair;
693       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
694         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
695       } else
696         return false;
697     } else
698       return addError("expecting another \\u token to begin the second half of "
699                       "a unicode surrogate pair",
700                       token, current);
701   }
702   return true;
703 }
704 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)705 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
706                                          Location end,
707                                          unsigned int& ret_unicode) {
708   if (end - current < 4)
709     return addError(
710         "Bad unicode escape sequence in string: four digits expected.", token,
711         current);
712   int unicode = 0;
713   for (int index = 0; index < 4; ++index) {
714     Char c = *current++;
715     unicode *= 16;
716     if (c >= '0' && c <= '9')
717       unicode += c - '0';
718     else if (c >= 'a' && c <= 'f')
719       unicode += c - 'a' + 10;
720     else if (c >= 'A' && c <= 'F')
721       unicode += c - 'A' + 10;
722     else
723       return addError(
724           "Bad unicode escape sequence in string: hexadecimal digit expected.",
725           token, current);
726   }
727   ret_unicode = static_cast<unsigned int>(unicode);
728   return true;
729 }
730 
addError(const String & message,Token & token,Location extra)731 bool Reader::addError(const String& message, Token& token, Location extra) {
732   ErrorInfo info;
733   info.token_ = token;
734   info.message_ = message;
735   info.extra_ = extra;
736   errors_.push_back(info);
737   return false;
738 }
739 
recoverFromError(TokenType skipUntilToken)740 bool Reader::recoverFromError(TokenType skipUntilToken) {
741   size_t const errorCount = errors_.size();
742   Token skip;
743   for (;;) {
744     if (!readToken(skip))
745       errors_.resize(errorCount); // discard errors caused by recovery
746     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
747       break;
748   }
749   errors_.resize(errorCount);
750   return false;
751 }
752 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)753 bool Reader::addErrorAndRecover(const String& message, Token& token,
754                                 TokenType skipUntilToken) {
755   addError(message, token);
756   return recoverFromError(skipUntilToken);
757 }
758 
currentValue()759 Value& Reader::currentValue() { return *(nodes_.top()); }
760 
getNextChar()761 Reader::Char Reader::getNextChar() {
762   if (current_ == end_)
763     return 0;
764   return *current_++;
765 }
766 
getLocationLineAndColumn(Location location,int & line,int & column) const767 void Reader::getLocationLineAndColumn(Location location, int& line,
768                                       int& column) const {
769   Location current = begin_;
770   Location lastLineStart = current;
771   line = 0;
772   while (current < location && current != end_) {
773     Char c = *current++;
774     if (c == '\r') {
775       // Add boundary check to avoid cross the border
776       if (current == end_) {
777         break;
778       }
779       if (*current == '\n')
780         ++current;
781       lastLineStart = current;
782       ++line;
783     } else if (c == '\n') {
784       lastLineStart = current;
785       ++line;
786     }
787   }
788   // column & line start at 1
789   column = int(location - lastLineStart) + 1;
790   ++line;
791 }
792 
getLocationLineAndColumn(Location location) const793 String Reader::getLocationLineAndColumn(Location location) const {
794   int line, column;
795   getLocationLineAndColumn(location, line, column);
796   char buffer[18 + 16 + 16 + 1];
797   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
798   return buffer;
799 }
800 
801 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const802 String Reader::getFormatedErrorMessages() const {
803   return getFormattedErrorMessages();
804 }
805 
getFormattedErrorMessages() const806 String Reader::getFormattedErrorMessages() const {
807   String formattedMessage;
808   for (Errors::const_iterator itError = errors_.begin();
809        itError != errors_.end(); ++itError) {
810     const ErrorInfo& error = *itError;
811     formattedMessage +=
812         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
813     formattedMessage += "  " + error.message_ + "\n";
814     if (error.extra_)
815       formattedMessage +=
816           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
817   }
818   return formattedMessage;
819 }
820 
getStructuredErrors() const821 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
822   std::vector<Reader::StructuredError> allErrors;
823   for (Errors::const_iterator itError = errors_.begin();
824        itError != errors_.end(); ++itError) {
825     const ErrorInfo& error = *itError;
826     Reader::StructuredError structured;
827     structured.offset_start = error.token_.start_ - begin_;
828     structured.offset_limit = error.token_.end_ - begin_;
829     structured.message = error.message_;
830     allErrors.push_back(structured);
831   }
832   return allErrors;
833 }
834 
pushError(const Value & value,const String & message)835 bool Reader::pushError(const Value& value, const String& message) {
836   ptrdiff_t const length = end_ - begin_;
837   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
838     return false;
839   Token token;
840   token.type_ = tokenError;
841   token.start_ = begin_ + value.getOffsetStart();
842   token.end_ = begin_ + value.getOffsetLimit();
843   ErrorInfo info;
844   info.token_ = token;
845   info.message_ = message;
846   info.extra_ = JSONCPP_NULL;
847   errors_.push_back(info);
848   return true;
849 }
850 
pushError(const Value & value,const String & message,const Value & extra)851 bool Reader::pushError(const Value& value, const String& message,
852                        const Value& extra) {
853   ptrdiff_t const length = end_ - begin_;
854   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
855       extra.getOffsetLimit() > length)
856     return false;
857   Token token;
858   token.type_ = tokenError;
859   token.start_ = begin_ + value.getOffsetStart();
860   token.end_ = begin_ + value.getOffsetLimit();
861   ErrorInfo info;
862   info.token_ = token;
863   info.message_ = message;
864   info.extra_ = begin_ + extra.getOffsetStart();
865   errors_.push_back(info);
866   return true;
867 }
868 
good() const869 bool Reader::good() const { return errors_.empty(); }
870 
871 // Originally copied from the Features class (now deprecated), used internally
872 // for features implementation.
873 class OurFeatures {
874 public:
875   static OurFeatures all();
876   bool allowComments_;
877   bool allowTrailingCommas_;
878   bool strictRoot_;
879   bool allowDroppedNullPlaceholders_;
880   bool allowNumericKeys_;
881   bool allowSingleQuotes_;
882   bool failIfExtra_;
883   bool rejectDupKeys_;
884   bool allowSpecialFloats_;
885   bool skipBom_;
886   size_t stackLimit_;
887 }; // OurFeatures
888 
all()889 OurFeatures OurFeatures::all() { return OurFeatures(); }
890 
891 // Implementation of class Reader
892 // ////////////////////////////////
893 
894 // Originally copied from the Reader class (now deprecated), used internally
895 // for implementing JSON reading.
896 class OurReader {
897 public:
898   typedef char Char;
899   typedef const Char* Location;
900   struct StructuredError {
901     ptrdiff_t offset_start;
902     ptrdiff_t offset_limit;
903     String message;
904   };
905 
906   JSONCPP_OP_EXPLICIT OurReader(OurFeatures const& features);
907   bool parse(const char* beginDoc, const char* endDoc, Value& root,
908              bool collectComments = true);
909   String getFormattedErrorMessages() const;
910   std::vector<StructuredError> getStructuredErrors() const;
911 
912 private:
913   OurReader(OurReader const&);      // no impl
914   void operator=(OurReader const&); // no impl
915 
916   enum TokenType {
917     tokenEndOfStream = 0,
918     tokenObjectBegin,
919     tokenObjectEnd,
920     tokenArrayBegin,
921     tokenArrayEnd,
922     tokenString,
923     tokenNumber,
924     tokenTrue,
925     tokenFalse,
926     tokenNull,
927     tokenNaN,
928     tokenPosInf,
929     tokenNegInf,
930     tokenArraySeparator,
931     tokenMemberSeparator,
932     tokenComment,
933     tokenError
934   };
935 
936   class Token {
937   public:
938     TokenType type_;
939     Location start_;
940     Location end_;
941   };
942 
943   class ErrorInfo {
944   public:
945     Token token_;
946     String message_;
947     Location extra_;
948   };
949 
950   typedef std::deque<ErrorInfo> Errors;
951 
952   bool readToken(Token& token);
953   void skipSpaces();
954   void skipBom(bool skipBom);
955   bool match(const Char* pattern, int patternLength);
956   bool readComment();
957   bool readCStyleComment(bool* containsNewLineResult);
958   bool readCppStyleComment();
959   bool readString();
960   bool readStringSingleQuote();
961   bool readNumber(bool checkInf);
962   bool readValue();
963   bool readObject(Token& token);
964   bool readArray(Token& token);
965   bool decodeNumber(Token& token);
966   bool decodeNumber(Token& token, Value& decoded);
967   bool decodeString(Token& token);
968   bool decodeString(Token& token, String& decoded);
969   bool decodeDouble(Token& token);
970   bool decodeDouble(Token& token, Value& decoded);
971   bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
972                               unsigned int& unicode);
973   bool decodeUnicodeEscapeSequence(Token& token, Location& current,
974                                    Location end, unsigned int& unicode);
975   bool addError(const String& message, Token& token,
976                 Location extra = JSONCPP_NULL);
977   bool recoverFromError(TokenType skipUntilToken);
978   bool addErrorAndRecover(const String& message, Token& token,
979                           TokenType skipUntilToken);
980   void skipUntilSpace();
981   Value& currentValue();
982   Char getNextChar();
983   void getLocationLineAndColumn(Location location, int& line,
984                                 int& column) const;
985   String getLocationLineAndColumn(Location location) const;
986   void addComment(Location begin, Location end, CommentPlacement placement);
987   void skipCommentTokens(Token& token);
988 
989   static String normalizeEOL(Location begin, Location end);
990   static bool containsNewLine(Location begin, Location end);
991 
992   typedef std::stack<Value*> Nodes;
993 
994   Nodes nodes_;
995   Errors errors_;
996   String document_;
997   Location begin_;
998   Location end_;
999   Location current_;
1000   Location lastValueEnd_;
1001   Value* lastValue_;
1002   bool lastValueHasAComment_;
1003   String commentsBefore_;
1004 
1005   OurFeatures const features_;
1006   bool collectComments_;
1007 }; // OurReader
1008 
1009 // complete copy of Read impl, for OurReader
1010 
containsNewLine(OurReader::Location begin,OurReader::Location end)1011 bool OurReader::containsNewLine(OurReader::Location begin,
1012                                 OurReader::Location end) {
1013   for (; begin < end; ++begin)
1014     if (*begin == '\n' || *begin == '\r')
1015       return true;
1016   return false;
1017 }
1018 
OurReader(OurFeatures const & features)1019 OurReader::OurReader(OurFeatures const& features)
1020     : errors_(), document_(), begin_(JSONCPP_NULL), end_(JSONCPP_NULL),
1021       current_(JSONCPP_NULL), lastValueEnd_(JSONCPP_NULL),
1022       lastValue_(JSONCPP_NULL), lastValueHasAComment_(false), commentsBefore_(),
1023       features_(features), collectComments_(false) {}
1024 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1025 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1026                       bool collectComments) {
1027   if (!features_.allowComments_) {
1028     collectComments = false;
1029   }
1030 
1031   begin_ = beginDoc;
1032   end_ = endDoc;
1033   collectComments_ = collectComments;
1034   current_ = begin_;
1035   lastValueEnd_ = JSONCPP_NULL;
1036   lastValue_ = JSONCPP_NULL;
1037   commentsBefore_.clear();
1038   errors_.clear();
1039   while (!nodes_.empty())
1040     nodes_.pop();
1041   nodes_.push(&root);
1042 
1043   // skip byte order mark if it exists at the beginning of the UTF-8 text.
1044   skipBom(features_.skipBom_);
1045   bool successful = readValue();
1046   nodes_.pop();
1047   Token token;
1048   skipCommentTokens(token);
1049   if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1050     addError("Extra non-whitespace after JSON value.", token);
1051     return false;
1052   }
1053   if (collectComments_ && !commentsBefore_.empty())
1054     root.setComment(commentsBefore_, commentAfter);
1055   if (features_.strictRoot_) {
1056     if (!root.isArray() && !root.isObject()) {
1057       // Set error location to start of doc, ideally should be first token found
1058       // in doc
1059       token.type_ = tokenError;
1060       token.start_ = beginDoc;
1061       token.end_ = endDoc;
1062       addError(
1063           "A valid JSON document must be either an array or an object value.",
1064           token);
1065       return false;
1066     }
1067   }
1068   return successful;
1069 }
1070 
readValue()1071 bool OurReader::readValue() {
1072   //  To preserve the old behaviour we cast size_t to int.
1073   if (nodes_.size() > features_.stackLimit_)
1074     throwRuntimeError("Exceeded stackLimit in readValue().");
1075   Token token;
1076   skipCommentTokens(token);
1077   bool successful = true;
1078 
1079   if (collectComments_ && !commentsBefore_.empty()) {
1080     currentValue().setComment(commentsBefore_, commentBefore);
1081     commentsBefore_.clear();
1082   }
1083 
1084   switch (token.type_) {
1085   case tokenObjectBegin:
1086     successful = readObject(token);
1087     currentValue().setOffsetLimit(current_ - begin_);
1088     break;
1089   case tokenArrayBegin:
1090     successful = readArray(token);
1091     currentValue().setOffsetLimit(current_ - begin_);
1092     break;
1093   case tokenNumber:
1094     successful = decodeNumber(token);
1095     break;
1096   case tokenString:
1097     successful = decodeString(token);
1098     break;
1099   case tokenTrue: {
1100     Value v(true);
1101     currentValue().swapPayload(v);
1102     currentValue().setOffsetStart(token.start_ - begin_);
1103     currentValue().setOffsetLimit(token.end_ - begin_);
1104   } break;
1105   case tokenFalse: {
1106     Value v(false);
1107     currentValue().swapPayload(v);
1108     currentValue().setOffsetStart(token.start_ - begin_);
1109     currentValue().setOffsetLimit(token.end_ - begin_);
1110   } break;
1111   case tokenNull: {
1112     Value v;
1113     currentValue().swapPayload(v);
1114     currentValue().setOffsetStart(token.start_ - begin_);
1115     currentValue().setOffsetLimit(token.end_ - begin_);
1116   } break;
1117   case tokenNaN: {
1118     Value v(std::numeric_limits<double>::quiet_NaN());
1119     currentValue().swapPayload(v);
1120     currentValue().setOffsetStart(token.start_ - begin_);
1121     currentValue().setOffsetLimit(token.end_ - begin_);
1122   } break;
1123   case tokenPosInf: {
1124     Value v(std::numeric_limits<double>::infinity());
1125     currentValue().swapPayload(v);
1126     currentValue().setOffsetStart(token.start_ - begin_);
1127     currentValue().setOffsetLimit(token.end_ - begin_);
1128   } break;
1129   case tokenNegInf: {
1130     Value v(-std::numeric_limits<double>::infinity());
1131     currentValue().swapPayload(v);
1132     currentValue().setOffsetStart(token.start_ - begin_);
1133     currentValue().setOffsetLimit(token.end_ - begin_);
1134   } break;
1135   case tokenArraySeparator:
1136   case tokenObjectEnd:
1137   case tokenArrayEnd:
1138     if (features_.allowDroppedNullPlaceholders_) {
1139       // "Un-read" the current token and mark the current value as a null
1140       // token.
1141       current_--;
1142       Value v;
1143       currentValue().swapPayload(v);
1144       currentValue().setOffsetStart(current_ - begin_ - 1);
1145       currentValue().setOffsetLimit(current_ - begin_);
1146       break;
1147     } // else, fall through ...
1148   default:
1149     currentValue().setOffsetStart(token.start_ - begin_);
1150     currentValue().setOffsetLimit(token.end_ - begin_);
1151     return addError("Syntax error: value, object or array expected.", token);
1152   }
1153 
1154   if (collectComments_) {
1155     lastValueEnd_ = current_;
1156     lastValueHasAComment_ = false;
1157     lastValue_ = &currentValue();
1158   }
1159 
1160   return successful;
1161 }
1162 
skipCommentTokens(Token & token)1163 void OurReader::skipCommentTokens(Token& token) {
1164   if (features_.allowComments_) {
1165     do {
1166       readToken(token);
1167     } while (token.type_ == tokenComment);
1168   } else {
1169     readToken(token);
1170   }
1171 }
1172 
readToken(Token & token)1173 bool OurReader::readToken(Token& token) {
1174   skipSpaces();
1175   token.start_ = current_;
1176   Char c = getNextChar();
1177   bool ok = true;
1178   switch (c) {
1179   case '{':
1180     token.type_ = tokenObjectBegin;
1181     break;
1182   case '}':
1183     token.type_ = tokenObjectEnd;
1184     break;
1185   case '[':
1186     token.type_ = tokenArrayBegin;
1187     break;
1188   case ']':
1189     token.type_ = tokenArrayEnd;
1190     break;
1191   case '"':
1192     token.type_ = tokenString;
1193     ok = readString();
1194     break;
1195   case '\'':
1196     if (features_.allowSingleQuotes_) {
1197       token.type_ = tokenString;
1198       ok = readStringSingleQuote();
1199       break;
1200     } // else fall through
1201   case '/':
1202     token.type_ = tokenComment;
1203     ok = readComment();
1204     break;
1205   case '0':
1206   case '1':
1207   case '2':
1208   case '3':
1209   case '4':
1210   case '5':
1211   case '6':
1212   case '7':
1213   case '8':
1214   case '9':
1215     token.type_ = tokenNumber;
1216     readNumber(false);
1217     break;
1218   case '-':
1219     if (readNumber(true)) {
1220       token.type_ = tokenNumber;
1221     } else {
1222       token.type_ = tokenNegInf;
1223       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1224     }
1225     break;
1226   case '+':
1227     if (readNumber(true)) {
1228       token.type_ = tokenNumber;
1229     } else {
1230       token.type_ = tokenPosInf;
1231       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1232     }
1233     break;
1234   case 't':
1235     token.type_ = tokenTrue;
1236     ok = match("rue", 3);
1237     break;
1238   case 'f':
1239     token.type_ = tokenFalse;
1240     ok = match("alse", 4);
1241     break;
1242   case 'n':
1243     token.type_ = tokenNull;
1244     ok = match("ull", 3);
1245     break;
1246   case 'N':
1247     if (features_.allowSpecialFloats_) {
1248       token.type_ = tokenNaN;
1249       ok = match("aN", 2);
1250     } else {
1251       ok = false;
1252     }
1253     break;
1254   case 'I':
1255     if (features_.allowSpecialFloats_) {
1256       token.type_ = tokenPosInf;
1257       ok = match("nfinity", 7);
1258     } else {
1259       ok = false;
1260     }
1261     break;
1262   case ',':
1263     token.type_ = tokenArraySeparator;
1264     break;
1265   case ':':
1266     token.type_ = tokenMemberSeparator;
1267     break;
1268   case 0:
1269     token.type_ = tokenEndOfStream;
1270     break;
1271   default:
1272     ok = false;
1273     break;
1274   }
1275   if (!ok)
1276     token.type_ = tokenError;
1277   token.end_ = current_;
1278   return ok;
1279 }
1280 
skipSpaces()1281 void OurReader::skipSpaces() {
1282   while (current_ != end_) {
1283     Char c = *current_;
1284     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1285       ++current_;
1286     else
1287       break;
1288   }
1289 }
1290 
skipBom(bool skipBom)1291 void OurReader::skipBom(bool skipBom) {
1292   // The default behavior is to skip BOM.
1293   if (skipBom) {
1294     if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1295       begin_ += 3;
1296       current_ = begin_;
1297     }
1298   }
1299 }
1300 
match(const Char * pattern,int patternLength)1301 bool OurReader::match(const Char* pattern, int patternLength) {
1302   if (end_ - current_ < patternLength)
1303     return false;
1304   int index = patternLength;
1305   while (index--)
1306     if (current_[index] != pattern[index])
1307       return false;
1308   current_ += patternLength;
1309   return true;
1310 }
1311 
readComment()1312 bool OurReader::readComment() {
1313   const Location commentBegin = current_ - 1;
1314   const Char c = getNextChar();
1315   bool successful = false;
1316   bool cStyleWithEmbeddedNewline = false;
1317 
1318   const bool isCStyleComment = (c == '*');
1319   const bool isCppStyleComment = (c == '/');
1320   if (isCStyleComment) {
1321     successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1322   } else if (isCppStyleComment) {
1323     successful = readCppStyleComment();
1324   }
1325 
1326   if (!successful)
1327     return false;
1328 
1329   if (collectComments_) {
1330     CommentPlacement placement = commentBefore;
1331 
1332     if (!lastValueHasAComment_) {
1333       if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1334         if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1335           placement = commentAfterOnSameLine;
1336           lastValueHasAComment_ = true;
1337         }
1338       }
1339     }
1340 
1341     addComment(commentBegin, current_, placement);
1342   }
1343   return true;
1344 }
1345 
normalizeEOL(OurReader::Location begin,OurReader::Location end)1346 String OurReader::normalizeEOL(OurReader::Location begin,
1347                                OurReader::Location end) {
1348   String normalized;
1349   normalized.reserve(static_cast<size_t>(end - begin));
1350   OurReader::Location current = begin;
1351   while (current != end) {
1352     char c = *current++;
1353     if (c == '\r') {
1354       if (current != end && *current == '\n')
1355         // convert dos EOL
1356         ++current;
1357       // convert Mac EOL
1358       normalized += '\n';
1359     } else {
1360       normalized += c;
1361     }
1362   }
1363   return normalized;
1364 }
1365 
addComment(Location begin,Location end,CommentPlacement placement)1366 void OurReader::addComment(Location begin, Location end,
1367                            CommentPlacement placement) {
1368   assert(collectComments_);
1369   const String& normalized = normalizeEOL(begin, end);
1370   if (placement == commentAfterOnSameLine) {
1371     assert(lastValue_ != JSONCPP_NULL);
1372     lastValue_->setComment(normalized, placement);
1373   } else {
1374     commentsBefore_ += normalized;
1375   }
1376 }
1377 
readCStyleComment(bool * containsNewLineResult)1378 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1379   *containsNewLineResult = false;
1380 
1381   while ((current_ + 1) < end_) {
1382     Char c = getNextChar();
1383     if (c == '*' && *current_ == '/')
1384       break;
1385     if (c == '\n')
1386       *containsNewLineResult = true;
1387   }
1388 
1389   return getNextChar() == '/';
1390 }
1391 
readCppStyleComment()1392 bool OurReader::readCppStyleComment() {
1393   while (current_ != end_) {
1394     Char c = getNextChar();
1395     if (c == '\n')
1396       break;
1397     if (c == '\r') {
1398       // Consume DOS EOL. It will be normalized in addComment.
1399       if (current_ != end_ && *current_ == '\n')
1400         getNextChar();
1401       // Break on Moc OS 9 EOL.
1402       break;
1403     }
1404   }
1405   return true;
1406 }
1407 
readNumber(bool checkInf)1408 bool OurReader::readNumber(bool checkInf) {
1409   Location p = current_;
1410   if (checkInf && p != end_ && *p == 'I') {
1411     current_ = ++p;
1412     return false;
1413   }
1414   char c = '0'; // stopgap for already consumed character
1415   // integral part
1416   while (c >= '0' && c <= '9')
1417     c = (current_ = p) < end_ ? *p++ : '\0';
1418   // fractional part
1419   if (c == '.') {
1420     c = (current_ = p) < end_ ? *p++ : '\0';
1421     while (c >= '0' && c <= '9')
1422       c = (current_ = p) < end_ ? *p++ : '\0';
1423   }
1424   // exponential part
1425   if (c == 'e' || c == 'E') {
1426     c = (current_ = p) < end_ ? *p++ : '\0';
1427     if (c == '+' || c == '-')
1428       c = (current_ = p) < end_ ? *p++ : '\0';
1429     while (c >= '0' && c <= '9')
1430       c = (current_ = p) < end_ ? *p++ : '\0';
1431   }
1432   return true;
1433 }
readString()1434 bool OurReader::readString() {
1435   Char c = 0;
1436   while (current_ != end_) {
1437     c = getNextChar();
1438     if (c == '\\')
1439       getNextChar();
1440     else if (c == '"')
1441       break;
1442   }
1443   return c == '"';
1444 }
1445 
readStringSingleQuote()1446 bool OurReader::readStringSingleQuote() {
1447   Char c = 0;
1448   while (current_ != end_) {
1449     c = getNextChar();
1450     if (c == '\\')
1451       getNextChar();
1452     else if (c == '\'')
1453       break;
1454   }
1455   return c == '\'';
1456 }
1457 
readObject(Token & token)1458 bool OurReader::readObject(Token& token) {
1459   Token tokenName;
1460   String name;
1461   Value init(objectValue);
1462   currentValue().swapPayload(init);
1463   currentValue().setOffsetStart(token.start_ - begin_);
1464   while (readToken(tokenName)) {
1465     bool initialTokenOk = true;
1466     while (tokenName.type_ == tokenComment && initialTokenOk)
1467       initialTokenOk = readToken(tokenName);
1468     if (!initialTokenOk)
1469       break;
1470     if (tokenName.type_ == tokenObjectEnd &&
1471         (name.empty() ||
1472          features_.allowTrailingCommas_)) // empty object or trailing comma
1473       return true;
1474     name.clear();
1475     if (tokenName.type_ == tokenString) {
1476       if (!decodeString(tokenName, name))
1477         return recoverFromError(tokenObjectEnd);
1478     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1479       Value numberName;
1480       if (!decodeNumber(tokenName, numberName))
1481         return recoverFromError(tokenObjectEnd);
1482       name = numberName.asString();
1483     } else {
1484       break;
1485     }
1486     if (name.length() >= (1U << 30))
1487       throwRuntimeError("keylength >= 2^30");
1488     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1489       String msg = "Duplicate key: '" + name + "'";
1490       return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1491     }
1492 
1493     Token colon;
1494     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1495       return addErrorAndRecover("Missing ':' after object member name", colon,
1496                                 tokenObjectEnd);
1497     }
1498     Value& value = currentValue()[name];
1499     nodes_.push(&value);
1500     bool ok = readValue();
1501     nodes_.pop();
1502     if (!ok) // error already set
1503       return recoverFromError(tokenObjectEnd);
1504 
1505     Token comma;
1506     if (!readToken(comma) ||
1507         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1508          comma.type_ != tokenComment)) {
1509       return addErrorAndRecover("Missing ',' or '}' in object declaration",
1510                                 comma, tokenObjectEnd);
1511     }
1512     bool finalizeTokenOk = true;
1513     while (comma.type_ == tokenComment && finalizeTokenOk)
1514       finalizeTokenOk = readToken(comma);
1515     if (comma.type_ == tokenObjectEnd)
1516       return true;
1517   }
1518   return addErrorAndRecover("Missing '}' or object member name", tokenName,
1519                             tokenObjectEnd);
1520 }
1521 
readArray(Token & token)1522 bool OurReader::readArray(Token& token) {
1523   Value init(arrayValue);
1524   currentValue().swapPayload(init);
1525   currentValue().setOffsetStart(token.start_ - begin_);
1526   int index = 0;
1527   for (;;) {
1528     skipSpaces();
1529     if (current_ != end_ && *current_ == ']' &&
1530         (index == 0 ||
1531          (features_.allowTrailingCommas_ &&
1532           !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1533                                                       // comma
1534     {
1535       Token endArray;
1536       readToken(endArray);
1537       return true;
1538     }
1539     Value& value = currentValue()[index++];
1540     nodes_.push(&value);
1541     bool ok = readValue();
1542     nodes_.pop();
1543     if (!ok) // error already set
1544       return recoverFromError(tokenArrayEnd);
1545 
1546     Token currentToken;
1547     // Accept Comment after last item in the array.
1548     ok = readToken(currentToken);
1549     while (currentToken.type_ == tokenComment && ok) {
1550       ok = readToken(currentToken);
1551     }
1552     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1553                          currentToken.type_ != tokenArrayEnd);
1554     if (!ok || badTokenType) {
1555       return addErrorAndRecover("Missing ',' or ']' in array declaration",
1556                                 currentToken, tokenArrayEnd);
1557     }
1558     if (currentToken.type_ == tokenArrayEnd)
1559       break;
1560   }
1561   return true;
1562 }
1563 
decodeNumber(Token & token)1564 bool OurReader::decodeNumber(Token& token) {
1565   Value decoded;
1566   if (!decodeNumber(token, decoded))
1567     return false;
1568   currentValue().swapPayload(decoded);
1569   currentValue().setOffsetStart(token.start_ - begin_);
1570   currentValue().setOffsetLimit(token.end_ - begin_);
1571   return true;
1572 }
1573 
decodeNumber(Token & token,Value & decoded)1574 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1575   // Attempts to parse the number as an integer. If the number is
1576   // larger than the maximum supported value of an integer then
1577   // we decode the number as a double.
1578   Location current = token.start_;
1579   const bool isNegative = *current == '-';
1580   if (isNegative) {
1581     ++current;
1582   }
1583 
1584   // We assume we can represent the largest and smallest integer types as
1585   // unsigned integers with separate sign. This is only true if they can fit
1586   // into an unsigned integer.
1587   JSONCPP_STATIC_ASSERT(LargestUInt(Value::maxLargestInt) <=
1588                             Value::maxLargestUInt,
1589                         "Int must be smaller than Uint");
1590   // We need to convert minLargestInt into a positive number. The easiest way
1591   // to do this conversion is to assume our "threshold" value of minLargestInt
1592   // divided by 10 can fit in maxLargestInt when absolute valued. This should
1593   // be a safe assumption.
1594   JSONCPP_STATIC_ASSERT(
1595       Value::minLargestInt <= -Value::maxLargestInt,
1596       "The absolute value of minLargestInt must ve greater than or"
1597       "equal to maxLargestInt");
1598 
1599   JSONCPP_STATIC_ASSERT(
1600       Value::minLargestInt / 10 >= -Value::maxLargestInt,
1601       "The absolute value of minLargestInt must be only 1 magnitude"
1602       "larger than maxLargestInt");
1603 
1604   static JSONCPP_CONST Value::LargestUInt positive_threshold =
1605       Value::maxLargestUInt / 10;
1606   static JSONCPP_CONST Value::UInt positive_last_digit =
1607       Value::maxLargestUInt % 10;
1608 
1609   // For the negative values, we have to be more careful. Since typically
1610   // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1611   // then take the inverse. This assumes that minLargestInt is only a single
1612   // power of 10 different in magnitude, which we check above. For the last
1613   // digit, we take the modulus before negating for the same reason.
1614   static JSONCPP_CONST Value::LargestUInt negative_threshold =
1615       Value::LargestUInt(-(Value::minLargestInt / 10));
1616   static JSONCPP_CONST Value::UInt negative_last_digit =
1617       Value::UInt(-(Value::minLargestInt % 10));
1618 
1619   const Value::LargestUInt threshold =
1620       isNegative ? negative_threshold : positive_threshold;
1621   const Value::UInt max_last_digit =
1622       isNegative ? negative_last_digit : positive_last_digit;
1623 
1624   Value::LargestUInt value = 0;
1625   while (current < token.end_) {
1626     Char c = *current++;
1627     if (c < '0' || c > '9')
1628       return decodeDouble(token, decoded);
1629 
1630     const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
1631     if (value >= threshold) {
1632       // We've hit or exceeded the max value divided by 10 (rounded down). If
1633       // a) we've only just touched the limit, meaing value == threshold,
1634       // b) this is the last digit, or
1635       // c) it's small enough to fit in that rounding delta, we're okay.
1636       // Otherwise treat this number as a double to avoid overflow.
1637       if (value > threshold || current != token.end_ ||
1638           digit > max_last_digit) {
1639         return decodeDouble(token, decoded);
1640       }
1641     }
1642     value = value * 10 + digit;
1643   }
1644 
1645   if (isNegative) {
1646     // We use the same magnitude assumption here, just in case.
1647     const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
1648     decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1649   } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1650     decoded = Value::LargestInt(value);
1651   } else {
1652     decoded = value;
1653   }
1654 
1655   return true;
1656 }
1657 
decodeDouble(Token & token)1658 bool OurReader::decodeDouble(Token& token) {
1659   Value decoded;
1660   if (!decodeDouble(token, decoded))
1661     return false;
1662   currentValue().swapPayload(decoded);
1663   currentValue().setOffsetStart(token.start_ - begin_);
1664   currentValue().setOffsetLimit(token.end_ - begin_);
1665   return true;
1666 }
1667 
decodeDouble(Token & token,Value & decoded)1668 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1669   double value = 0;
1670   const String buffer(token.start_, token.end_);
1671   IStringStream is(buffer);
1672   if (!(is >> value)) {
1673     // the value could be lower than numeric_limits<double>::min(), in this situtation we should return the value with the gurantee
1674     // of conversion which has been performed and no occurances of range error.
1675     if ((value > 0 && value < std::numeric_limits<double>::min()) || (value < 0 && value > -std::numeric_limits<double>::min())) {
1676       decoded = value;
1677       return true;
1678     }
1679     return addError(
1680         "'" + String(token.start_, token.end_) + "' is not a number.", token);
1681   }
1682   decoded = value;
1683   return true;
1684 }
1685 
decodeString(Token & token)1686 bool OurReader::decodeString(Token& token) {
1687   String decoded_string;
1688   if (!decodeString(token, decoded_string))
1689     return false;
1690   Value decoded(decoded_string);
1691   currentValue().swapPayload(decoded);
1692   currentValue().setOffsetStart(token.start_ - begin_);
1693   currentValue().setOffsetLimit(token.end_ - begin_);
1694   return true;
1695 }
1696 
decodeString(Token & token,String & decoded)1697 bool OurReader::decodeString(Token& token, String& decoded) {
1698   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1699   Location current = token.start_ + 1; // skip '"'
1700   Location end = token.end_ - 1;       // do not include '"'
1701   while (current != end) {
1702     Char c = *current++;
1703     if (c == '"')
1704       break;
1705     if (c == '\\') {
1706       if (current == end)
1707         return addError("Empty escape sequence in string", token, current);
1708       Char escape = *current++;
1709       switch (escape) {
1710       case '"':
1711         decoded += '"';
1712         break;
1713       case '/':
1714         decoded += '/';
1715         break;
1716       case '\\':
1717         decoded += '\\';
1718         break;
1719       case 'b':
1720         decoded += '\b';
1721         break;
1722       case 'f':
1723         decoded += '\f';
1724         break;
1725       case 'n':
1726         decoded += '\n';
1727         break;
1728       case 'r':
1729         decoded += '\r';
1730         break;
1731       case 't':
1732         decoded += '\t';
1733         break;
1734       case 'u': {
1735         unsigned int unicode;
1736         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1737           return false;
1738         decoded += codePointToUTF8(unicode);
1739       } break;
1740       default:
1741         return addError("Bad escape sequence in string", token, current);
1742       }
1743     } else {
1744       decoded += c;
1745     }
1746   }
1747   return true;
1748 }
1749 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1750 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1751                                        Location end, unsigned int& unicode) {
1752 
1753   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1754     return false;
1755   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1756     // surrogate pairs
1757     if (end - current < 6)
1758       return addError(
1759           "additional six characters expected to parse unicode surrogate pair.",
1760           token, current);
1761     if (*(current++) == '\\' && *(current++) == 'u') {
1762       unsigned int surrogatePair;
1763       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1764         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1765       } else
1766         return false;
1767     } else
1768       return addError("expecting another \\u token to begin the second half of "
1769                       "a unicode surrogate pair",
1770                       token, current);
1771   }
1772   return true;
1773 }
1774 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1775 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1776                                             Location end,
1777                                             unsigned int& ret_unicode) {
1778   if (end - current < 4)
1779     return addError(
1780         "Bad unicode escape sequence in string: four digits expected.", token,
1781         current);
1782   int unicode = 0;
1783   for (int index = 0; index < 4; ++index) {
1784     Char c = *current++;
1785     unicode *= 16;
1786     if (c >= '0' && c <= '9')
1787       unicode += c - '0';
1788     else if (c >= 'a' && c <= 'f')
1789       unicode += c - 'a' + 10;
1790     else if (c >= 'A' && c <= 'F')
1791       unicode += c - 'A' + 10;
1792     else
1793       return addError(
1794           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1795           token, current);
1796   }
1797   ret_unicode = static_cast<unsigned int>(unicode);
1798   return true;
1799 }
1800 
addError(const String & message,Token & token,Location extra)1801 bool OurReader::addError(const String& message, Token& token, Location extra) {
1802   ErrorInfo info;
1803   info.token_ = token;
1804   info.message_ = message;
1805   info.extra_ = extra;
1806   errors_.push_back(info);
1807   return false;
1808 }
1809 
recoverFromError(TokenType skipUntilToken)1810 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1811   size_t errorCount = errors_.size();
1812   Token skip;
1813   for (;;) {
1814     if (!readToken(skip))
1815       errors_.resize(errorCount); // discard errors caused by recovery
1816     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1817       break;
1818   }
1819   errors_.resize(errorCount);
1820   return false;
1821 }
1822 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1823 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1824                                    TokenType skipUntilToken) {
1825   addError(message, token);
1826   return recoverFromError(skipUntilToken);
1827 }
1828 
currentValue()1829 Value& OurReader::currentValue() { return *(nodes_.top()); }
1830 
getNextChar()1831 OurReader::Char OurReader::getNextChar() {
1832   if (current_ == end_)
1833     return 0;
1834   return *current_++;
1835 }
1836 
getLocationLineAndColumn(Location location,int & line,int & column) const1837 void OurReader::getLocationLineAndColumn(Location location, int& line,
1838                                          int& column) const {
1839   Location current = begin_;
1840   Location lastLineStart = current;
1841   line = 0;
1842   while (current < location && current != end_) {
1843     Char c = *current++;
1844     if (c == '\r') {
1845       // Add boundary check to avoid cross the border
1846       if (current == end_) {
1847         break;
1848       }
1849       if (*current == '\n')
1850         ++current;
1851       lastLineStart = current;
1852       ++line;
1853     } else if (c == '\n') {
1854       lastLineStart = current;
1855       ++line;
1856     }
1857   }
1858   // column & line start at 1
1859   column = int(location - lastLineStart) + 1;
1860   ++line;
1861 }
1862 
getLocationLineAndColumn(Location location) const1863 String OurReader::getLocationLineAndColumn(Location location) const {
1864   int line, column;
1865   getLocationLineAndColumn(location, line, column);
1866   char buffer[18 + 16 + 16 + 1];
1867   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1868   return buffer;
1869 }
1870 
getFormattedErrorMessages() const1871 String OurReader::getFormattedErrorMessages() const {
1872   String formattedMessage;
1873   for (Errors::const_iterator itError = errors_.begin();
1874        itError != errors_.end(); ++itError) {
1875     const ErrorInfo& error = *itError;
1876     formattedMessage +=
1877         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1878     formattedMessage += "  " + error.message_ + "\n";
1879     if (error.extra_)
1880       formattedMessage +=
1881           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1882   }
1883   return formattedMessage;
1884 }
1885 
getStructuredErrors() const1886 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1887   std::vector<OurReader::StructuredError> allErrors;
1888   for (Errors::const_iterator itError = errors_.begin();
1889        itError != errors_.end(); ++itError) {
1890     const ErrorInfo& error = *itError;
1891     OurReader::StructuredError structured;
1892     structured.offset_start = error.token_.start_ - begin_;
1893     structured.offset_limit = error.token_.end_ - begin_;
1894     structured.message = error.message_;
1895     allErrors.push_back(structured);
1896   }
1897   return allErrors;
1898 }
1899 
1900 class OurCharReader : public CharReader {
1901   bool const collectComments_;
1902   OurReader reader_;
1903 
1904 public:
OurCharReader(bool collectComments,OurFeatures const & features)1905   OurCharReader(bool collectComments, OurFeatures const& features)
1906       : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1907   bool parse(char const* beginDoc, char const* endDoc, Value* root,
1908              String* errs) JSONCPP_OVERRIDE {
1909     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1910     if (errs) {
1911       *errs = reader_.getFormattedErrorMessages();
1912     }
1913     return ok;
1914   }
1915 };
1916 
CharReaderBuilder()1917 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
~CharReaderBuilder()1918 CharReaderBuilder::~CharReaderBuilder() {}
newCharReader() const1919 CharReader* CharReaderBuilder::newCharReader() const {
1920   bool collectComments = settings_["collectComments"].asBool();
1921   OurFeatures features = OurFeatures::all();
1922   features.allowComments_ = settings_["allowComments"].asBool();
1923   features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1924   features.strictRoot_ = settings_["strictRoot"].asBool();
1925   features.allowDroppedNullPlaceholders_ =
1926       settings_["allowDroppedNullPlaceholders"].asBool();
1927   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1928   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1929 
1930   // Stack limit is always a size_t, so we get this as an unsigned int
1931   // regardless of it we have 64-bit integer support enabled.
1932   features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1933   features.failIfExtra_ = settings_["failIfExtra"].asBool();
1934   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1935   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1936   features.skipBom_ = settings_["skipBom"].asBool();
1937   return new OurCharReader(collectComments, features);
1938 }
getValidReaderKeys(std::set<String> * valid_keys)1939 static void getValidReaderKeys(std::set<String>* valid_keys) {
1940   valid_keys->clear();
1941   valid_keys->insert("collectComments");
1942   valid_keys->insert("allowComments");
1943   valid_keys->insert("allowTrailingCommas");
1944   valid_keys->insert("strictRoot");
1945   valid_keys->insert("allowDroppedNullPlaceholders");
1946   valid_keys->insert("allowNumericKeys");
1947   valid_keys->insert("allowSingleQuotes");
1948   valid_keys->insert("stackLimit");
1949   valid_keys->insert("failIfExtra");
1950   valid_keys->insert("rejectDupKeys");
1951   valid_keys->insert("allowSpecialFloats");
1952   valid_keys->insert("skipBom");
1953 }
validate(Json::Value * invalid) const1954 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1955   Json::Value my_invalid;
1956   if (!invalid)
1957     invalid = &my_invalid; // so we do not need to test for NULL
1958   Json::Value& inv = *invalid;
1959   std::set<String> valid_keys;
1960   getValidReaderKeys(&valid_keys);
1961   Value::Members keys = settings_.getMemberNames();
1962   size_t n = keys.size();
1963   for (size_t i = 0; i < n; ++i) {
1964     String const& key = keys[i];
1965     if (valid_keys.find(key) == valid_keys.end()) {
1966       inv[key] = settings_[key];
1967     }
1968   }
1969   return inv.empty();
1970 }
operator [](const String & key)1971 Value& CharReaderBuilder::operator[](const String& key) {
1972   return settings_[key];
1973 }
1974 // static
strictMode(Json::Value * settings)1975 void CharReaderBuilder::strictMode(Json::Value* settings) {
1976   //! [CharReaderBuilderStrictMode]
1977   (*settings)["allowComments"] = false;
1978   (*settings)["allowTrailingCommas"] = false;
1979   (*settings)["strictRoot"] = true;
1980   (*settings)["allowDroppedNullPlaceholders"] = false;
1981   (*settings)["allowNumericKeys"] = false;
1982   (*settings)["allowSingleQuotes"] = false;
1983   (*settings)["stackLimit"] = 1000;
1984   (*settings)["failIfExtra"] = true;
1985   (*settings)["rejectDupKeys"] = true;
1986   (*settings)["allowSpecialFloats"] = false;
1987   (*settings)["skipBom"] = true;
1988   //! [CharReaderBuilderStrictMode]
1989 }
1990 // static
setDefaults(Json::Value * settings)1991 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1992   //! [CharReaderBuilderDefaults]
1993   (*settings)["collectComments"] = true;
1994   (*settings)["allowComments"] = true;
1995   (*settings)["allowTrailingCommas"] = true;
1996   (*settings)["strictRoot"] = false;
1997   (*settings)["allowDroppedNullPlaceholders"] = false;
1998   (*settings)["allowNumericKeys"] = false;
1999   (*settings)["allowSingleQuotes"] = false;
2000   (*settings)["stackLimit"] = 1000;
2001   (*settings)["failIfExtra"] = false;
2002   (*settings)["rejectDupKeys"] = false;
2003   (*settings)["allowSpecialFloats"] = false;
2004   (*settings)["skipBom"] = true;
2005   //! [CharReaderBuilderDefaults]
2006 }
2007 
2008 //////////////////////////////////
2009 // global functions
2010 
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)2011 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
2012                      String* errs) {
2013   OStringStream ssin;
2014   ssin << sin.rdbuf();
2015   String doc = ssin.str();
2016   char const* begin = doc.data();
2017   char const* end = begin + doc.size();
2018   // Note that we do not actually need a null-terminator.
2019   CharReaderPtr const reader(fact.newCharReader());
2020   bool ret = reader->parse(begin, end, root, errs);
2021   delete reader;
2022   return ret;
2023 }
2024 
operator >>(IStream & sin,Value & root)2025 IStream& operator>>(IStream& sin, Value& root) {
2026   CharReaderBuilder b;
2027   String errs;
2028   bool ok = parseFromStream(b, sin, &root, &errs);
2029   if (!ok) {
2030     throwRuntimeError(errs);
2031   }
2032   return sin;
2033 }
2034 
2035 } // namespace Json
2036