1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <iostream>
16 #include <istream>
17 #include <limits>
18 #include <memory>
19 #include <set>
20 #include <sstream>
21 #include <utility>
22
23 #include <cstdio>
24 #if __cplusplus >= 201103L
25
26 #if !defined(sscanf)
27 #define sscanf std::sscanf
28 #endif
29
30 #endif //__cplusplus
31
32 #if defined(_MSC_VER)
33 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
34 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
35 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
36 #endif //_MSC_VER
37
38 #if defined(_MSC_VER)
39 // Disable warning about strdup being deprecated.
40 #pragma warning(disable : 4996)
41 #endif
42
43 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
44 // time to change the stack limit
45 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
46 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
47 #endif
48
49 static size_t const stackLimit_g =
50 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
51
52 namespace Json {
53
54 typedef CharReader* CharReaderPtr;
55
56 // Implementation of class Features
57 // ////////////////////////////////
58
Features()59 Features::Features()
60 : allowComments_(true), strictRoot_(false),
61 allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
all()62 Features Features::all() { return Features(); }
63
strictMode()64 Features Features::strictMode() {
65 Features features;
66 features.allowComments_ = false;
67 features.strictRoot_ = true;
68 features.allowDroppedNullPlaceholders_ = false;
69 features.allowNumericKeys_ = false;
70 return features;
71 }
72
73 // Implementation of class Reader
74 // ////////////////////////////////
75
containsNewLine(Reader::Location begin,Reader::Location end)76 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
77 for (; begin < end; ++begin)
78 if (*begin == '\n' || *begin == '\r')
79 return true;
80 return false;
81 }
82
83 // Class Reader
84 // //////////////////////////////////////////////////////////////////
85
Reader()86 Reader::Reader()
87 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
88 lastValue_(), commentsBefore_(), features_(Features::all()),
89 collectComments_() {}
90
Reader(const Features & features)91 Reader::Reader(const Features& features)
92 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document, Value& root,
97 bool collectComments) {
98 document_.assign(document.begin(), document.end());
99 const char* begin = document_.c_str();
100 const char* end = begin + document_.length();
101 return parse(begin, end, root, collectComments);
102 }
103
parse(std::istream & is,Value & root,bool collectComments)104 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
105 // std::istream_iterator<char> begin(is);
106 // std::istream_iterator<char> end;
107 // Those would allow streamed input from a file, if parse() were a
108 // template function.
109
110 // Since String is reference-counted, this at least does not
111 // create an extra copy.
112 String doc;
113 std::getline(is, doc, static_cast<char> EOF);
114 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
115 }
116
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)117 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
118 bool collectComments) {
119 if (!features_.allowComments_) {
120 collectComments = false;
121 }
122
123 begin_ = beginDoc;
124 end_ = endDoc;
125 collectComments_ = collectComments;
126 current_ = begin_;
127 lastValueEnd_ = JSONCPP_NULL;
128 lastValue_ = JSONCPP_NULL;
129 commentsBefore_.clear();
130 errors_.clear();
131 while (!nodes_.empty())
132 nodes_.pop();
133 nodes_.push(&root);
134
135 bool successful = readValue();
136 Token token;
137 skipCommentTokens(token);
138 if (collectComments_ && !commentsBefore_.empty())
139 root.setComment(commentsBefore_, commentAfter);
140 if (features_.strictRoot_) {
141 if (!root.isArray() && !root.isObject()) {
142 // Set error location to start of doc, ideally should be first token found
143 // in doc
144 token.type_ = tokenError;
145 token.start_ = beginDoc;
146 token.end_ = endDoc;
147 addError(
148 "A valid JSON document must be either an array or an object value.",
149 token);
150 return false;
151 }
152 }
153 return successful;
154 }
155
readValue()156 bool Reader::readValue() {
157 // readValue() may call itself only if it calls readObject() or ReadArray().
158 // These methods execute nodes_.push() just before and nodes_.pop)() just
159 // after calling readValue(). parse() executes one nodes_.push(), so > instead
160 // of >=.
161 if (nodes_.size() > stackLimit_g)
162 throwRuntimeError("Exceeded stackLimit in readValue().");
163
164 Token token;
165 skipCommentTokens(token);
166 bool successful = true;
167
168 if (collectComments_ && !commentsBefore_.empty()) {
169 currentValue().setComment(commentsBefore_, commentBefore);
170 commentsBefore_.clear();
171 }
172
173 switch (token.type_) {
174 case tokenObjectBegin:
175 successful = readObject(token);
176 currentValue().setOffsetLimit(current_ - begin_);
177 break;
178 case tokenArrayBegin:
179 successful = readArray(token);
180 currentValue().setOffsetLimit(current_ - begin_);
181 break;
182 case tokenNumber:
183 successful = decodeNumber(token);
184 break;
185 case tokenString:
186 successful = decodeString(token);
187 break;
188 case tokenTrue: {
189 Value v(true);
190 currentValue().swapPayload(v);
191 currentValue().setOffsetStart(token.start_ - begin_);
192 currentValue().setOffsetLimit(token.end_ - begin_);
193 } break;
194 case tokenFalse: {
195 Value v(false);
196 currentValue().swapPayload(v);
197 currentValue().setOffsetStart(token.start_ - begin_);
198 currentValue().setOffsetLimit(token.end_ - begin_);
199 } break;
200 case tokenNull: {
201 Value v;
202 currentValue().swapPayload(v);
203 currentValue().setOffsetStart(token.start_ - begin_);
204 currentValue().setOffsetLimit(token.end_ - begin_);
205 } break;
206 case tokenArraySeparator:
207 case tokenObjectEnd:
208 case tokenArrayEnd:
209 if (features_.allowDroppedNullPlaceholders_) {
210 // "Un-read" the current token and mark the current value as a null
211 // token.
212 current_--;
213 Value v;
214 currentValue().swapPayload(v);
215 currentValue().setOffsetStart(current_ - begin_ - 1);
216 currentValue().setOffsetLimit(current_ - begin_);
217 break;
218 } // Else, fall through...
219 default:
220 currentValue().setOffsetStart(token.start_ - begin_);
221 currentValue().setOffsetLimit(token.end_ - begin_);
222 return addError("Syntax error: value, object or array expected.", token);
223 }
224
225 if (collectComments_) {
226 lastValueEnd_ = current_;
227 lastValue_ = ¤tValue();
228 }
229
230 return successful;
231 }
232
skipCommentTokens(Token & token)233 void Reader::skipCommentTokens(Token& token) {
234 if (features_.allowComments_) {
235 do {
236 readToken(token);
237 } while (token.type_ == tokenComment);
238 } else {
239 readToken(token);
240 }
241 }
242
readToken(Token & token)243 bool Reader::readToken(Token& token) {
244 skipSpaces();
245 token.start_ = current_;
246 Char c = getNextChar();
247 bool ok = true;
248 switch (c) {
249 case '{':
250 token.type_ = tokenObjectBegin;
251 break;
252 case '}':
253 token.type_ = tokenObjectEnd;
254 break;
255 case '[':
256 token.type_ = tokenArrayBegin;
257 break;
258 case ']':
259 token.type_ = tokenArrayEnd;
260 break;
261 case '"':
262 token.type_ = tokenString;
263 ok = readString();
264 break;
265 case '/':
266 token.type_ = tokenComment;
267 ok = readComment();
268 break;
269 case '0':
270 case '1':
271 case '2':
272 case '3':
273 case '4':
274 case '5':
275 case '6':
276 case '7':
277 case '8':
278 case '9':
279 case '-':
280 token.type_ = tokenNumber;
281 readNumber();
282 break;
283 case 't':
284 token.type_ = tokenTrue;
285 ok = match("rue", 3);
286 break;
287 case 'f':
288 token.type_ = tokenFalse;
289 ok = match("alse", 4);
290 break;
291 case 'n':
292 token.type_ = tokenNull;
293 ok = match("ull", 3);
294 break;
295 case ',':
296 token.type_ = tokenArraySeparator;
297 break;
298 case ':':
299 token.type_ = tokenMemberSeparator;
300 break;
301 case 0:
302 token.type_ = tokenEndOfStream;
303 break;
304 default:
305 ok = false;
306 break;
307 }
308 if (!ok)
309 token.type_ = tokenError;
310 token.end_ = current_;
311 return ok;
312 }
313
skipSpaces()314 void Reader::skipSpaces() {
315 while (current_ != end_) {
316 Char c = *current_;
317 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
318 ++current_;
319 else
320 break;
321 }
322 }
323
match(const Char * pattern,int patternLength)324 bool Reader::match(const Char* pattern, int patternLength) {
325 if (end_ - current_ < patternLength)
326 return false;
327 int index = patternLength;
328 while (index--)
329 if (current_[index] != pattern[index])
330 return false;
331 current_ += patternLength;
332 return true;
333 }
334
readComment()335 bool Reader::readComment() {
336 Location commentBegin = current_ - 1;
337 Char c = getNextChar();
338 bool successful = false;
339 if (c == '*')
340 successful = readCStyleComment();
341 else if (c == '/')
342 successful = readCppStyleComment();
343 if (!successful)
344 return false;
345
346 if (collectComments_) {
347 CommentPlacement placement = commentBefore;
348 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
349 if (c != '*' || !containsNewLine(commentBegin, current_))
350 placement = commentAfterOnSameLine;
351 }
352
353 addComment(commentBegin, current_, placement);
354 }
355 return true;
356 }
357
normalizeEOL(Reader::Location begin,Reader::Location end)358 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
359 String normalized;
360 normalized.reserve(static_cast<size_t>(end - begin));
361 Reader::Location current = begin;
362 while (current != end) {
363 char c = *current++;
364 if (c == '\r') {
365 if (current != end && *current == '\n')
366 // convert dos EOL
367 ++current;
368 // convert Mac EOL
369 normalized += '\n';
370 } else {
371 normalized += c;
372 }
373 }
374 return normalized;
375 }
376
addComment(Location begin,Location end,CommentPlacement placement)377 void Reader::addComment(Location begin, Location end,
378 CommentPlacement placement) {
379 assert(collectComments_);
380 const String& normalized = normalizeEOL(begin, end);
381 if (placement == commentAfterOnSameLine) {
382 assert(lastValue_ != JSONCPP_NULL);
383 lastValue_->setComment(normalized, placement);
384 } else {
385 commentsBefore_ += normalized;
386 }
387 }
388
readCStyleComment()389 bool Reader::readCStyleComment() {
390 while ((current_ + 1) < end_) {
391 Char c = getNextChar();
392 if (c == '*' && *current_ == '/')
393 break;
394 }
395 return getNextChar() == '/';
396 }
397
readCppStyleComment()398 bool Reader::readCppStyleComment() {
399 while (current_ != end_) {
400 Char c = getNextChar();
401 if (c == '\n')
402 break;
403 if (c == '\r') {
404 // Consume DOS EOL. It will be normalized in addComment.
405 if (current_ != end_ && *current_ == '\n')
406 getNextChar();
407 // Break on Moc OS 9 EOL.
408 break;
409 }
410 }
411 return true;
412 }
413
readNumber()414 void Reader::readNumber() {
415 Location p = current_;
416 char c = '0'; // stopgap for already consumed character
417 // integral part
418 while (c >= '0' && c <= '9')
419 c = (current_ = p) < end_ ? *p++ : '\0';
420 // fractional part
421 if (c == '.') {
422 c = (current_ = p) < end_ ? *p++ : '\0';
423 while (c >= '0' && c <= '9')
424 c = (current_ = p) < end_ ? *p++ : '\0';
425 }
426 // exponential part
427 if (c == 'e' || c == 'E') {
428 c = (current_ = p) < end_ ? *p++ : '\0';
429 if (c == '+' || c == '-')
430 c = (current_ = p) < end_ ? *p++ : '\0';
431 while (c >= '0' && c <= '9')
432 c = (current_ = p) < end_ ? *p++ : '\0';
433 }
434 }
435
readString()436 bool Reader::readString() {
437 Char c = '\0';
438 while (current_ != end_) {
439 c = getNextChar();
440 if (c == '\\')
441 getNextChar();
442 else if (c == '"')
443 break;
444 }
445 return c == '"';
446 }
447
readObject(Token & token)448 bool Reader::readObject(Token& token) {
449 Token tokenName;
450 String name;
451 Value init(objectValue);
452 currentValue().swapPayload(init);
453 currentValue().setOffsetStart(token.start_ - begin_);
454 while (readToken(tokenName)) {
455 bool initialTokenOk = true;
456 while (tokenName.type_ == tokenComment && initialTokenOk)
457 initialTokenOk = readToken(tokenName);
458 if (!initialTokenOk)
459 break;
460 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
461 return true;
462 name.clear();
463 if (tokenName.type_ == tokenString) {
464 if (!decodeString(tokenName, name))
465 return recoverFromError(tokenObjectEnd);
466 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
467 Value numberName;
468 if (!decodeNumber(tokenName, numberName))
469 return recoverFromError(tokenObjectEnd);
470 name = numberName.asString();
471 } else {
472 break;
473 }
474
475 Token colon;
476 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
477 return addErrorAndRecover("Missing ':' after object member name", colon,
478 tokenObjectEnd);
479 }
480 Value& value = currentValue()[name];
481 nodes_.push(&value);
482 bool ok = readValue();
483 nodes_.pop();
484 if (!ok) // error already set
485 return recoverFromError(tokenObjectEnd);
486
487 Token comma;
488 if (!readToken(comma) ||
489 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
490 comma.type_ != tokenComment)) {
491 return addErrorAndRecover("Missing ',' or '}' in object declaration",
492 comma, tokenObjectEnd);
493 }
494 bool finalizeTokenOk = true;
495 while (comma.type_ == tokenComment && finalizeTokenOk)
496 finalizeTokenOk = readToken(comma);
497 if (comma.type_ == tokenObjectEnd)
498 return true;
499 }
500 return addErrorAndRecover("Missing '}' or object member name", tokenName,
501 tokenObjectEnd);
502 }
503
readArray(Token & token)504 bool Reader::readArray(Token& token) {
505 Value init(arrayValue);
506 currentValue().swapPayload(init);
507 currentValue().setOffsetStart(token.start_ - begin_);
508 skipSpaces();
509 if (current_ != end_ && *current_ == ']') // empty array
510 {
511 Token endArray;
512 readToken(endArray);
513 return true;
514 }
515 int index = 0;
516 for (;;) {
517 Value& value = currentValue()[index++];
518 nodes_.push(&value);
519 bool ok = readValue();
520 nodes_.pop();
521 if (!ok) // error already set
522 return recoverFromError(tokenArrayEnd);
523
524 Token currentToken;
525 // Accept Comment after last item in the array.
526 ok = readToken(currentToken);
527 while (currentToken.type_ == tokenComment && ok) {
528 ok = readToken(currentToken);
529 }
530 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
531 currentToken.type_ != tokenArrayEnd);
532 if (!ok || badTokenType) {
533 return addErrorAndRecover("Missing ',' or ']' in array declaration",
534 currentToken, tokenArrayEnd);
535 }
536 if (currentToken.type_ == tokenArrayEnd)
537 break;
538 }
539 return true;
540 }
541
decodeNumber(Token & token)542 bool Reader::decodeNumber(Token& token) {
543 Value decoded;
544 if (!decodeNumber(token, decoded))
545 return false;
546 currentValue().swapPayload(decoded);
547 currentValue().setOffsetStart(token.start_ - begin_);
548 currentValue().setOffsetLimit(token.end_ - begin_);
549 return true;
550 }
551
decodeNumber(Token & token,Value & decoded)552 bool Reader::decodeNumber(Token& token, Value& decoded) {
553 // Attempts to parse the number as an integer. If the number is
554 // larger than the maximum supported value of an integer then
555 // we decode the number as a double.
556 Location current = token.start_;
557 bool isNegative = *current == '-';
558 if (isNegative)
559 ++current;
560 // TODO: Help the compiler do the div and mod at compile time or get rid of
561 // them.
562 Value::LargestUInt maxIntegerValue =
563 isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
564 : Value::maxLargestUInt;
565 Value::LargestUInt threshold = maxIntegerValue / 10;
566 Value::LargestUInt value = 0;
567 while (current < token.end_) {
568 Char c = *current++;
569 if (c < '0' || c > '9')
570 return decodeDouble(token, decoded);
571 Value::UInt digit(static_cast<Value::UInt>(c - '0'));
572 if (value >= threshold) {
573 // We've hit or exceeded the max value divided by 10 (rounded down). If
574 // a) we've only just touched the limit, b) this is the last digit, and
575 // c) it's small enough to fit in that rounding delta, we're okay.
576 // Otherwise treat this number as a double to avoid overflow.
577 if (value > threshold || current != token.end_ ||
578 digit > maxIntegerValue % 10) {
579 return decodeDouble(token, decoded);
580 }
581 }
582 value = value * 10 + digit;
583 }
584 if (isNegative && value == maxIntegerValue)
585 decoded = Value::minLargestInt;
586 else if (isNegative)
587 decoded = -Value::LargestInt(value);
588 else if (value <= Value::LargestUInt(Value::maxInt))
589 decoded = Value::LargestInt(value);
590 else
591 decoded = value;
592 return true;
593 }
594
decodeDouble(Token & token)595 bool Reader::decodeDouble(Token& token) {
596 Value decoded;
597 if (!decodeDouble(token, decoded))
598 return false;
599 currentValue().swapPayload(decoded);
600 currentValue().setOffsetStart(token.start_ - begin_);
601 currentValue().setOffsetLimit(token.end_ - begin_);
602 return true;
603 }
604
decodeDouble(Token & token,Value & decoded)605 bool Reader::decodeDouble(Token& token, Value& decoded) {
606 double value = 0;
607 String buffer(token.start_, token.end_);
608 IStringStream is(buffer);
609 if (!(is >> value))
610 return addError(
611 "'" + String(token.start_, token.end_) + "' is not a number.", token);
612 decoded = value;
613 return true;
614 }
615
decodeString(Token & token)616 bool Reader::decodeString(Token& token) {
617 String decoded_string;
618 if (!decodeString(token, decoded_string))
619 return false;
620 Value decoded(decoded_string);
621 currentValue().swapPayload(decoded);
622 currentValue().setOffsetStart(token.start_ - begin_);
623 currentValue().setOffsetLimit(token.end_ - begin_);
624 return true;
625 }
626
decodeString(Token & token,String & decoded)627 bool Reader::decodeString(Token& token, String& decoded) {
628 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
629 Location current = token.start_ + 1; // skip '"'
630 Location end = token.end_ - 1; // do not include '"'
631 while (current != end) {
632 Char c = *current++;
633 if (c == '"')
634 break;
635 if (c == '\\') {
636 if (current == end)
637 return addError("Empty escape sequence in string", token, current);
638 Char escape = *current++;
639 switch (escape) {
640 case '"':
641 decoded += '"';
642 break;
643 case '/':
644 decoded += '/';
645 break;
646 case '\\':
647 decoded += '\\';
648 break;
649 case 'b':
650 decoded += '\b';
651 break;
652 case 'f':
653 decoded += '\f';
654 break;
655 case 'n':
656 decoded += '\n';
657 break;
658 case 'r':
659 decoded += '\r';
660 break;
661 case 't':
662 decoded += '\t';
663 break;
664 case 'u': {
665 unsigned int unicode;
666 if (!decodeUnicodeCodePoint(token, current, end, unicode))
667 return false;
668 decoded += codePointToUTF8(unicode);
669 } break;
670 default:
671 return addError("Bad escape sequence in string", token, current);
672 }
673 } else {
674 decoded += c;
675 }
676 }
677 return true;
678 }
679
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)680 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
681 Location end, unsigned int& unicode) {
682
683 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
684 return false;
685 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
686 // surrogate pairs
687 if (end - current < 6)
688 return addError(
689 "additional six characters expected to parse unicode surrogate pair.",
690 token, current);
691 if (*(current++) == '\\' && *(current++) == 'u') {
692 unsigned int surrogatePair;
693 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
694 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
695 } else
696 return false;
697 } else
698 return addError("expecting another \\u token to begin the second half of "
699 "a unicode surrogate pair",
700 token, current);
701 }
702 return true;
703 }
704
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)705 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
706 Location end,
707 unsigned int& ret_unicode) {
708 if (end - current < 4)
709 return addError(
710 "Bad unicode escape sequence in string: four digits expected.", token,
711 current);
712 int unicode = 0;
713 for (int index = 0; index < 4; ++index) {
714 Char c = *current++;
715 unicode *= 16;
716 if (c >= '0' && c <= '9')
717 unicode += c - '0';
718 else if (c >= 'a' && c <= 'f')
719 unicode += c - 'a' + 10;
720 else if (c >= 'A' && c <= 'F')
721 unicode += c - 'A' + 10;
722 else
723 return addError(
724 "Bad unicode escape sequence in string: hexadecimal digit expected.",
725 token, current);
726 }
727 ret_unicode = static_cast<unsigned int>(unicode);
728 return true;
729 }
730
addError(const String & message,Token & token,Location extra)731 bool Reader::addError(const String& message, Token& token, Location extra) {
732 ErrorInfo info;
733 info.token_ = token;
734 info.message_ = message;
735 info.extra_ = extra;
736 errors_.push_back(info);
737 return false;
738 }
739
recoverFromError(TokenType skipUntilToken)740 bool Reader::recoverFromError(TokenType skipUntilToken) {
741 size_t const errorCount = errors_.size();
742 Token skip;
743 for (;;) {
744 if (!readToken(skip))
745 errors_.resize(errorCount); // discard errors caused by recovery
746 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
747 break;
748 }
749 errors_.resize(errorCount);
750 return false;
751 }
752
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)753 bool Reader::addErrorAndRecover(const String& message, Token& token,
754 TokenType skipUntilToken) {
755 addError(message, token);
756 return recoverFromError(skipUntilToken);
757 }
758
currentValue()759 Value& Reader::currentValue() { return *(nodes_.top()); }
760
getNextChar()761 Reader::Char Reader::getNextChar() {
762 if (current_ == end_)
763 return 0;
764 return *current_++;
765 }
766
getLocationLineAndColumn(Location location,int & line,int & column) const767 void Reader::getLocationLineAndColumn(Location location, int& line,
768 int& column) const {
769 Location current = begin_;
770 Location lastLineStart = current;
771 line = 0;
772 while (current < location && current != end_) {
773 Char c = *current++;
774 if (c == '\r') {
775 if (*current == '\n')
776 ++current;
777 lastLineStart = current;
778 ++line;
779 } else if (c == '\n') {
780 lastLineStart = current;
781 ++line;
782 }
783 }
784 // column & line start at 1
785 column = int(location - lastLineStart) + 1;
786 ++line;
787 }
788
getLocationLineAndColumn(Location location) const789 String Reader::getLocationLineAndColumn(Location location) const {
790 int line, column;
791 getLocationLineAndColumn(location, line, column);
792 char buffer[18 + 16 + 16 + 1];
793 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
794 return buffer;
795 }
796
797 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const798 String Reader::getFormatedErrorMessages() const {
799 return getFormattedErrorMessages();
800 }
801
getFormattedErrorMessages() const802 String Reader::getFormattedErrorMessages() const {
803 String formattedMessage;
804 for (Errors::const_iterator itError = errors_.begin();
805 itError != errors_.end(); ++itError) {
806 const ErrorInfo& error = *itError;
807 formattedMessage +=
808 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
809 formattedMessage += " " + error.message_ + "\n";
810 if (error.extra_)
811 formattedMessage +=
812 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
813 }
814 return formattedMessage;
815 }
816
getStructuredErrors() const817 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
818 std::vector<Reader::StructuredError> allErrors;
819 for (Errors::const_iterator itError = errors_.begin();
820 itError != errors_.end(); ++itError) {
821 const ErrorInfo& error = *itError;
822 Reader::StructuredError structured;
823 structured.offset_start = error.token_.start_ - begin_;
824 structured.offset_limit = error.token_.end_ - begin_;
825 structured.message = error.message_;
826 allErrors.push_back(structured);
827 }
828 return allErrors;
829 }
830
pushError(const Value & value,const String & message)831 bool Reader::pushError(const Value& value, const String& message) {
832 ptrdiff_t const length = end_ - begin_;
833 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
834 return false;
835 Token token;
836 token.type_ = tokenError;
837 token.start_ = begin_ + value.getOffsetStart();
838 token.end_ = begin_ + value.getOffsetLimit();
839 ErrorInfo info;
840 info.token_ = token;
841 info.message_ = message;
842 info.extra_ = JSONCPP_NULL;
843 errors_.push_back(info);
844 return true;
845 }
846
pushError(const Value & value,const String & message,const Value & extra)847 bool Reader::pushError(const Value& value, const String& message,
848 const Value& extra) {
849 ptrdiff_t const length = end_ - begin_;
850 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
851 extra.getOffsetLimit() > length)
852 return false;
853 Token token;
854 token.type_ = tokenError;
855 token.start_ = begin_ + value.getOffsetStart();
856 token.end_ = begin_ + value.getOffsetLimit();
857 ErrorInfo info;
858 info.token_ = token;
859 info.message_ = message;
860 info.extra_ = begin_ + extra.getOffsetStart();
861 errors_.push_back(info);
862 return true;
863 }
864
good() const865 bool Reader::good() const { return errors_.empty(); }
866
867 // Originally copied from the Features class (now deprecated), used internally
868 // for features implementation.
869 class OurFeatures {
870 public:
871 static OurFeatures all();
872 bool allowComments_;
873 bool allowTrailingCommas_;
874 bool strictRoot_;
875 bool allowDroppedNullPlaceholders_;
876 bool allowNumericKeys_;
877 bool allowSingleQuotes_;
878 bool failIfExtra_;
879 bool rejectDupKeys_;
880 bool allowSpecialFloats_;
881 bool skipBom_;
882 size_t stackLimit_;
883 }; // OurFeatures
884
all()885 OurFeatures OurFeatures::all() { return OurFeatures(); }
886
887 // Implementation of class Reader
888 // ////////////////////////////////
889
890 // Originally copied from the Reader class (now deprecated), used internally
891 // for implementing JSON reading.
892 class OurReader {
893 public:
894 typedef char Char;
895 typedef const Char* Location;
896 struct StructuredError {
897 ptrdiff_t offset_start;
898 ptrdiff_t offset_limit;
899 String message;
900 };
901
902 JSONCPP_OP_EXPLICIT OurReader(OurFeatures const& features);
903 bool parse(const char* beginDoc, const char* endDoc, Value& root,
904 bool collectComments = true);
905 String getFormattedErrorMessages() const;
906 std::vector<StructuredError> getStructuredErrors() const;
907
908 private:
909 OurReader(OurReader const&); // no impl
910 void operator=(OurReader const&); // no impl
911
912 enum TokenType {
913 tokenEndOfStream = 0,
914 tokenObjectBegin,
915 tokenObjectEnd,
916 tokenArrayBegin,
917 tokenArrayEnd,
918 tokenString,
919 tokenNumber,
920 tokenTrue,
921 tokenFalse,
922 tokenNull,
923 tokenNaN,
924 tokenPosInf,
925 tokenNegInf,
926 tokenArraySeparator,
927 tokenMemberSeparator,
928 tokenComment,
929 tokenError
930 };
931
932 class Token {
933 public:
934 TokenType type_;
935 Location start_;
936 Location end_;
937 };
938
939 class ErrorInfo {
940 public:
941 Token token_;
942 String message_;
943 Location extra_;
944 };
945
946 typedef std::deque<ErrorInfo> Errors;
947
948 bool readToken(Token& token);
949 void skipSpaces();
950 void skipBom(bool skipBom);
951 bool match(const Char* pattern, int patternLength);
952 bool readComment();
953 bool readCStyleComment(bool* containsNewLineResult);
954 bool readCppStyleComment();
955 bool readString();
956 bool readStringSingleQuote();
957 bool readNumber(bool checkInf);
958 bool readValue();
959 bool readObject(Token& token);
960 bool readArray(Token& token);
961 bool decodeNumber(Token& token);
962 bool decodeNumber(Token& token, Value& decoded);
963 bool decodeString(Token& token);
964 bool decodeString(Token& token, String& decoded);
965 bool decodeDouble(Token& token);
966 bool decodeDouble(Token& token, Value& decoded);
967 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
968 unsigned int& unicode);
969 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
970 Location end, unsigned int& unicode);
971 bool addError(const String& message, Token& token,
972 Location extra = JSONCPP_NULL);
973 bool recoverFromError(TokenType skipUntilToken);
974 bool addErrorAndRecover(const String& message, Token& token,
975 TokenType skipUntilToken);
976 void skipUntilSpace();
977 Value& currentValue();
978 Char getNextChar();
979 void getLocationLineAndColumn(Location location, int& line,
980 int& column) const;
981 String getLocationLineAndColumn(Location location) const;
982 void addComment(Location begin, Location end, CommentPlacement placement);
983 void skipCommentTokens(Token& token);
984
985 static String normalizeEOL(Location begin, Location end);
986 static bool containsNewLine(Location begin, Location end);
987
988 typedef std::stack<Value*> Nodes;
989
990 Nodes nodes_;
991 Errors errors_;
992 String document_;
993 Location begin_;
994 Location end_;
995 Location current_;
996 Location lastValueEnd_;
997 Value* lastValue_;
998 bool lastValueHasAComment_;
999 String commentsBefore_;
1000
1001 OurFeatures const features_;
1002 bool collectComments_;
1003 }; // OurReader
1004
1005 // complete copy of Read impl, for OurReader
1006
containsNewLine(OurReader::Location begin,OurReader::Location end)1007 bool OurReader::containsNewLine(OurReader::Location begin,
1008 OurReader::Location end) {
1009 for (; begin < end; ++begin)
1010 if (*begin == '\n' || *begin == '\r')
1011 return true;
1012 return false;
1013 }
1014
OurReader(OurFeatures const & features)1015 OurReader::OurReader(OurFeatures const& features)
1016 : errors_(), document_(), begin_(JSONCPP_NULL), end_(JSONCPP_NULL),
1017 current_(JSONCPP_NULL), lastValueEnd_(JSONCPP_NULL),
1018 lastValue_(JSONCPP_NULL), lastValueHasAComment_(false), commentsBefore_(),
1019 features_(features), collectComments_(false) {}
1020
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1021 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1022 bool collectComments) {
1023 if (!features_.allowComments_) {
1024 collectComments = false;
1025 }
1026
1027 begin_ = beginDoc;
1028 end_ = endDoc;
1029 collectComments_ = collectComments;
1030 current_ = begin_;
1031 lastValueEnd_ = JSONCPP_NULL;
1032 lastValue_ = JSONCPP_NULL;
1033 commentsBefore_.clear();
1034 errors_.clear();
1035 while (!nodes_.empty())
1036 nodes_.pop();
1037 nodes_.push(&root);
1038
1039 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1040 skipBom(features_.skipBom_);
1041 bool successful = readValue();
1042 nodes_.pop();
1043 Token token;
1044 skipCommentTokens(token);
1045 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1046 addError("Extra non-whitespace after JSON value.", token);
1047 return false;
1048 }
1049 if (collectComments_ && !commentsBefore_.empty())
1050 root.setComment(commentsBefore_, commentAfter);
1051 if (features_.strictRoot_) {
1052 if (!root.isArray() && !root.isObject()) {
1053 // Set error location to start of doc, ideally should be first token found
1054 // in doc
1055 token.type_ = tokenError;
1056 token.start_ = beginDoc;
1057 token.end_ = endDoc;
1058 addError(
1059 "A valid JSON document must be either an array or an object value.",
1060 token);
1061 return false;
1062 }
1063 }
1064 return successful;
1065 }
1066
readValue()1067 bool OurReader::readValue() {
1068 // To preserve the old behaviour we cast size_t to int.
1069 if (nodes_.size() > features_.stackLimit_)
1070 throwRuntimeError("Exceeded stackLimit in readValue().");
1071 Token token;
1072 skipCommentTokens(token);
1073 bool successful = true;
1074
1075 if (collectComments_ && !commentsBefore_.empty()) {
1076 currentValue().setComment(commentsBefore_, commentBefore);
1077 commentsBefore_.clear();
1078 }
1079
1080 switch (token.type_) {
1081 case tokenObjectBegin:
1082 successful = readObject(token);
1083 currentValue().setOffsetLimit(current_ - begin_);
1084 break;
1085 case tokenArrayBegin:
1086 successful = readArray(token);
1087 currentValue().setOffsetLimit(current_ - begin_);
1088 break;
1089 case tokenNumber:
1090 successful = decodeNumber(token);
1091 break;
1092 case tokenString:
1093 successful = decodeString(token);
1094 break;
1095 case tokenTrue: {
1096 Value v(true);
1097 currentValue().swapPayload(v);
1098 currentValue().setOffsetStart(token.start_ - begin_);
1099 currentValue().setOffsetLimit(token.end_ - begin_);
1100 } break;
1101 case tokenFalse: {
1102 Value v(false);
1103 currentValue().swapPayload(v);
1104 currentValue().setOffsetStart(token.start_ - begin_);
1105 currentValue().setOffsetLimit(token.end_ - begin_);
1106 } break;
1107 case tokenNull: {
1108 Value v;
1109 currentValue().swapPayload(v);
1110 currentValue().setOffsetStart(token.start_ - begin_);
1111 currentValue().setOffsetLimit(token.end_ - begin_);
1112 } break;
1113 case tokenNaN: {
1114 Value v(std::numeric_limits<double>::quiet_NaN());
1115 currentValue().swapPayload(v);
1116 currentValue().setOffsetStart(token.start_ - begin_);
1117 currentValue().setOffsetLimit(token.end_ - begin_);
1118 } break;
1119 case tokenPosInf: {
1120 Value v(std::numeric_limits<double>::infinity());
1121 currentValue().swapPayload(v);
1122 currentValue().setOffsetStart(token.start_ - begin_);
1123 currentValue().setOffsetLimit(token.end_ - begin_);
1124 } break;
1125 case tokenNegInf: {
1126 Value v(-std::numeric_limits<double>::infinity());
1127 currentValue().swapPayload(v);
1128 currentValue().setOffsetStart(token.start_ - begin_);
1129 currentValue().setOffsetLimit(token.end_ - begin_);
1130 } break;
1131 case tokenArraySeparator:
1132 case tokenObjectEnd:
1133 case tokenArrayEnd:
1134 if (features_.allowDroppedNullPlaceholders_) {
1135 // "Un-read" the current token and mark the current value as a null
1136 // token.
1137 current_--;
1138 Value v;
1139 currentValue().swapPayload(v);
1140 currentValue().setOffsetStart(current_ - begin_ - 1);
1141 currentValue().setOffsetLimit(current_ - begin_);
1142 break;
1143 } // else, fall through ...
1144 default:
1145 currentValue().setOffsetStart(token.start_ - begin_);
1146 currentValue().setOffsetLimit(token.end_ - begin_);
1147 return addError("Syntax error: value, object or array expected.", token);
1148 }
1149
1150 if (collectComments_) {
1151 lastValueEnd_ = current_;
1152 lastValueHasAComment_ = false;
1153 lastValue_ = ¤tValue();
1154 }
1155
1156 return successful;
1157 }
1158
skipCommentTokens(Token & token)1159 void OurReader::skipCommentTokens(Token& token) {
1160 if (features_.allowComments_) {
1161 do {
1162 readToken(token);
1163 } while (token.type_ == tokenComment);
1164 } else {
1165 readToken(token);
1166 }
1167 }
1168
readToken(Token & token)1169 bool OurReader::readToken(Token& token) {
1170 skipSpaces();
1171 token.start_ = current_;
1172 Char c = getNextChar();
1173 bool ok = true;
1174 switch (c) {
1175 case '{':
1176 token.type_ = tokenObjectBegin;
1177 break;
1178 case '}':
1179 token.type_ = tokenObjectEnd;
1180 break;
1181 case '[':
1182 token.type_ = tokenArrayBegin;
1183 break;
1184 case ']':
1185 token.type_ = tokenArrayEnd;
1186 break;
1187 case '"':
1188 token.type_ = tokenString;
1189 ok = readString();
1190 break;
1191 case '\'':
1192 if (features_.allowSingleQuotes_) {
1193 token.type_ = tokenString;
1194 ok = readStringSingleQuote();
1195 break;
1196 } // else fall through
1197 case '/':
1198 token.type_ = tokenComment;
1199 ok = readComment();
1200 break;
1201 case '0':
1202 case '1':
1203 case '2':
1204 case '3':
1205 case '4':
1206 case '5':
1207 case '6':
1208 case '7':
1209 case '8':
1210 case '9':
1211 token.type_ = tokenNumber;
1212 readNumber(false);
1213 break;
1214 case '-':
1215 if (readNumber(true)) {
1216 token.type_ = tokenNumber;
1217 } else {
1218 token.type_ = tokenNegInf;
1219 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1220 }
1221 break;
1222 case '+':
1223 if (readNumber(true)) {
1224 token.type_ = tokenNumber;
1225 } else {
1226 token.type_ = tokenPosInf;
1227 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1228 }
1229 break;
1230 case 't':
1231 token.type_ = tokenTrue;
1232 ok = match("rue", 3);
1233 break;
1234 case 'f':
1235 token.type_ = tokenFalse;
1236 ok = match("alse", 4);
1237 break;
1238 case 'n':
1239 token.type_ = tokenNull;
1240 ok = match("ull", 3);
1241 break;
1242 case 'N':
1243 if (features_.allowSpecialFloats_) {
1244 token.type_ = tokenNaN;
1245 ok = match("aN", 2);
1246 } else {
1247 ok = false;
1248 }
1249 break;
1250 case 'I':
1251 if (features_.allowSpecialFloats_) {
1252 token.type_ = tokenPosInf;
1253 ok = match("nfinity", 7);
1254 } else {
1255 ok = false;
1256 }
1257 break;
1258 case ',':
1259 token.type_ = tokenArraySeparator;
1260 break;
1261 case ':':
1262 token.type_ = tokenMemberSeparator;
1263 break;
1264 case 0:
1265 token.type_ = tokenEndOfStream;
1266 break;
1267 default:
1268 ok = false;
1269 break;
1270 }
1271 if (!ok)
1272 token.type_ = tokenError;
1273 token.end_ = current_;
1274 return ok;
1275 }
1276
skipSpaces()1277 void OurReader::skipSpaces() {
1278 while (current_ != end_) {
1279 Char c = *current_;
1280 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1281 ++current_;
1282 else
1283 break;
1284 }
1285 }
1286
skipBom(bool skipBom)1287 void OurReader::skipBom(bool skipBom) {
1288 // The default behavior is to skip BOM.
1289 if (skipBom) {
1290 if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1291 begin_ += 3;
1292 current_ = begin_;
1293 }
1294 }
1295 }
1296
match(const Char * pattern,int patternLength)1297 bool OurReader::match(const Char* pattern, int patternLength) {
1298 if (end_ - current_ < patternLength)
1299 return false;
1300 int index = patternLength;
1301 while (index--)
1302 if (current_[index] != pattern[index])
1303 return false;
1304 current_ += patternLength;
1305 return true;
1306 }
1307
readComment()1308 bool OurReader::readComment() {
1309 const Location commentBegin = current_ - 1;
1310 const Char c = getNextChar();
1311 bool successful = false;
1312 bool cStyleWithEmbeddedNewline = false;
1313
1314 const bool isCStyleComment = (c == '*');
1315 const bool isCppStyleComment = (c == '/');
1316 if (isCStyleComment) {
1317 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1318 } else if (isCppStyleComment) {
1319 successful = readCppStyleComment();
1320 }
1321
1322 if (!successful)
1323 return false;
1324
1325 if (collectComments_) {
1326 CommentPlacement placement = commentBefore;
1327
1328 if (!lastValueHasAComment_) {
1329 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1330 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1331 placement = commentAfterOnSameLine;
1332 lastValueHasAComment_ = true;
1333 }
1334 }
1335 }
1336
1337 addComment(commentBegin, current_, placement);
1338 }
1339 return true;
1340 }
1341
normalizeEOL(OurReader::Location begin,OurReader::Location end)1342 String OurReader::normalizeEOL(OurReader::Location begin,
1343 OurReader::Location end) {
1344 String normalized;
1345 normalized.reserve(static_cast<size_t>(end - begin));
1346 OurReader::Location current = begin;
1347 while (current != end) {
1348 char c = *current++;
1349 if (c == '\r') {
1350 if (current != end && *current == '\n')
1351 // convert dos EOL
1352 ++current;
1353 // convert Mac EOL
1354 normalized += '\n';
1355 } else {
1356 normalized += c;
1357 }
1358 }
1359 return normalized;
1360 }
1361
addComment(Location begin,Location end,CommentPlacement placement)1362 void OurReader::addComment(Location begin, Location end,
1363 CommentPlacement placement) {
1364 assert(collectComments_);
1365 const String& normalized = normalizeEOL(begin, end);
1366 if (placement == commentAfterOnSameLine) {
1367 assert(lastValue_ != JSONCPP_NULL);
1368 lastValue_->setComment(normalized, placement);
1369 } else {
1370 commentsBefore_ += normalized;
1371 }
1372 }
1373
readCStyleComment(bool * containsNewLineResult)1374 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1375 *containsNewLineResult = false;
1376
1377 while ((current_ + 1) < end_) {
1378 Char c = getNextChar();
1379 if (c == '*' && *current_ == '/')
1380 break;
1381 if (c == '\n')
1382 *containsNewLineResult = true;
1383 }
1384
1385 return getNextChar() == '/';
1386 }
1387
readCppStyleComment()1388 bool OurReader::readCppStyleComment() {
1389 while (current_ != end_) {
1390 Char c = getNextChar();
1391 if (c == '\n')
1392 break;
1393 if (c == '\r') {
1394 // Consume DOS EOL. It will be normalized in addComment.
1395 if (current_ != end_ && *current_ == '\n')
1396 getNextChar();
1397 // Break on Moc OS 9 EOL.
1398 break;
1399 }
1400 }
1401 return true;
1402 }
1403
readNumber(bool checkInf)1404 bool OurReader::readNumber(bool checkInf) {
1405 Location p = current_;
1406 if (checkInf && p != end_ && *p == 'I') {
1407 current_ = ++p;
1408 return false;
1409 }
1410 char c = '0'; // stopgap for already consumed character
1411 // integral part
1412 while (c >= '0' && c <= '9')
1413 c = (current_ = p) < end_ ? *p++ : '\0';
1414 // fractional part
1415 if (c == '.') {
1416 c = (current_ = p) < end_ ? *p++ : '\0';
1417 while (c >= '0' && c <= '9')
1418 c = (current_ = p) < end_ ? *p++ : '\0';
1419 }
1420 // exponential part
1421 if (c == 'e' || c == 'E') {
1422 c = (current_ = p) < end_ ? *p++ : '\0';
1423 if (c == '+' || c == '-')
1424 c = (current_ = p) < end_ ? *p++ : '\0';
1425 while (c >= '0' && c <= '9')
1426 c = (current_ = p) < end_ ? *p++ : '\0';
1427 }
1428 return true;
1429 }
readString()1430 bool OurReader::readString() {
1431 Char c = 0;
1432 while (current_ != end_) {
1433 c = getNextChar();
1434 if (c == '\\')
1435 getNextChar();
1436 else if (c == '"')
1437 break;
1438 }
1439 return c == '"';
1440 }
1441
readStringSingleQuote()1442 bool OurReader::readStringSingleQuote() {
1443 Char c = 0;
1444 while (current_ != end_) {
1445 c = getNextChar();
1446 if (c == '\\')
1447 getNextChar();
1448 else if (c == '\'')
1449 break;
1450 }
1451 return c == '\'';
1452 }
1453
readObject(Token & token)1454 bool OurReader::readObject(Token& token) {
1455 Token tokenName;
1456 String name;
1457 Value init(objectValue);
1458 currentValue().swapPayload(init);
1459 currentValue().setOffsetStart(token.start_ - begin_);
1460 while (readToken(tokenName)) {
1461 bool initialTokenOk = true;
1462 while (tokenName.type_ == tokenComment && initialTokenOk)
1463 initialTokenOk = readToken(tokenName);
1464 if (!initialTokenOk)
1465 break;
1466 if (tokenName.type_ == tokenObjectEnd &&
1467 (name.empty() ||
1468 features_.allowTrailingCommas_)) // empty object or trailing comma
1469 return true;
1470 name.clear();
1471 if (tokenName.type_ == tokenString) {
1472 if (!decodeString(tokenName, name))
1473 return recoverFromError(tokenObjectEnd);
1474 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1475 Value numberName;
1476 if (!decodeNumber(tokenName, numberName))
1477 return recoverFromError(tokenObjectEnd);
1478 name = numberName.asString();
1479 } else {
1480 break;
1481 }
1482 if (name.length() >= (1U << 30))
1483 throwRuntimeError("keylength >= 2^30");
1484 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1485 String msg = "Duplicate key: '" + name + "'";
1486 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1487 }
1488
1489 Token colon;
1490 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1491 return addErrorAndRecover("Missing ':' after object member name", colon,
1492 tokenObjectEnd);
1493 }
1494 Value& value = currentValue()[name];
1495 nodes_.push(&value);
1496 bool ok = readValue();
1497 nodes_.pop();
1498 if (!ok) // error already set
1499 return recoverFromError(tokenObjectEnd);
1500
1501 Token comma;
1502 if (!readToken(comma) ||
1503 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1504 comma.type_ != tokenComment)) {
1505 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1506 comma, tokenObjectEnd);
1507 }
1508 bool finalizeTokenOk = true;
1509 while (comma.type_ == tokenComment && finalizeTokenOk)
1510 finalizeTokenOk = readToken(comma);
1511 if (comma.type_ == tokenObjectEnd)
1512 return true;
1513 }
1514 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1515 tokenObjectEnd);
1516 }
1517
readArray(Token & token)1518 bool OurReader::readArray(Token& token) {
1519 Value init(arrayValue);
1520 currentValue().swapPayload(init);
1521 currentValue().setOffsetStart(token.start_ - begin_);
1522 int index = 0;
1523 for (;;) {
1524 skipSpaces();
1525 if (current_ != end_ && *current_ == ']' &&
1526 (index == 0 ||
1527 (features_.allowTrailingCommas_ &&
1528 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1529 // comma
1530 {
1531 Token endArray;
1532 readToken(endArray);
1533 return true;
1534 }
1535 Value& value = currentValue()[index++];
1536 nodes_.push(&value);
1537 bool ok = readValue();
1538 nodes_.pop();
1539 if (!ok) // error already set
1540 return recoverFromError(tokenArrayEnd);
1541
1542 Token currentToken;
1543 // Accept Comment after last item in the array.
1544 ok = readToken(currentToken);
1545 while (currentToken.type_ == tokenComment && ok) {
1546 ok = readToken(currentToken);
1547 }
1548 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1549 currentToken.type_ != tokenArrayEnd);
1550 if (!ok || badTokenType) {
1551 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1552 currentToken, tokenArrayEnd);
1553 }
1554 if (currentToken.type_ == tokenArrayEnd)
1555 break;
1556 }
1557 return true;
1558 }
1559
decodeNumber(Token & token)1560 bool OurReader::decodeNumber(Token& token) {
1561 Value decoded;
1562 if (!decodeNumber(token, decoded))
1563 return false;
1564 currentValue().swapPayload(decoded);
1565 currentValue().setOffsetStart(token.start_ - begin_);
1566 currentValue().setOffsetLimit(token.end_ - begin_);
1567 return true;
1568 }
1569
decodeNumber(Token & token,Value & decoded)1570 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1571 // Attempts to parse the number as an integer. If the number is
1572 // larger than the maximum supported value of an integer then
1573 // we decode the number as a double.
1574 Location current = token.start_;
1575 const bool isNegative = *current == '-';
1576 if (isNegative) {
1577 ++current;
1578 }
1579
1580 // We assume we can represent the largest and smallest integer types as
1581 // unsigned integers with separate sign. This is only true if they can fit
1582 // into an unsigned integer.
1583 JSONCPP_STATIC_ASSERT(LargestUInt(Value::maxLargestInt) <=
1584 Value::maxLargestUInt,
1585 "Int must be smaller than Uint");
1586 // We need to convert minLargestInt into a positive number. The easiest way
1587 // to do this conversion is to assume our "threshold" value of minLargestInt
1588 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1589 // be a safe assumption.
1590 JSONCPP_STATIC_ASSERT(
1591 Value::minLargestInt <= -Value::maxLargestInt,
1592 "The absolute value of minLargestInt must ve greater than or"
1593 "equal to maxLargestInt");
1594
1595 JSONCPP_STATIC_ASSERT(
1596 Value::minLargestInt / 10 >= -Value::maxLargestInt,
1597 "The absolute value of minLargestInt must be only 1 magnitude"
1598 "larger than maxLargestInt");
1599
1600 static JSONCPP_CONST Value::LargestUInt positive_threshold =
1601 Value::maxLargestUInt / 10;
1602 static JSONCPP_CONST Value::UInt positive_last_digit =
1603 Value::maxLargestUInt % 10;
1604
1605 // For the negative values, we have to be more careful. Since typically
1606 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1607 // then take the inverse. This assumes that minLargestInt is only a single
1608 // power of 10 different in magnitude, which we check above. For the last
1609 // digit, we take the modulus before negating for the same reason.
1610 static JSONCPP_CONST Value::LargestUInt negative_threshold =
1611 Value::LargestUInt(-(Value::minLargestInt / 10));
1612 static JSONCPP_CONST Value::UInt negative_last_digit =
1613 Value::UInt(-(Value::minLargestInt % 10));
1614
1615 const Value::LargestUInt threshold =
1616 isNegative ? negative_threshold : positive_threshold;
1617 const Value::UInt max_last_digit =
1618 isNegative ? negative_last_digit : positive_last_digit;
1619
1620 Value::LargestUInt value = 0;
1621 while (current < token.end_) {
1622 Char c = *current++;
1623 if (c < '0' || c > '9')
1624 return decodeDouble(token, decoded);
1625
1626 const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
1627 if (value >= threshold) {
1628 // We've hit or exceeded the max value divided by 10 (rounded down). If
1629 // a) we've only just touched the limit, meaing value == threshold,
1630 // b) this is the last digit, or
1631 // c) it's small enough to fit in that rounding delta, we're okay.
1632 // Otherwise treat this number as a double to avoid overflow.
1633 if (value > threshold || current != token.end_ ||
1634 digit > max_last_digit) {
1635 return decodeDouble(token, decoded);
1636 }
1637 }
1638 value = value * 10 + digit;
1639 }
1640
1641 if (isNegative) {
1642 // We use the same magnitude assumption here, just in case.
1643 const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
1644 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1645 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1646 decoded = Value::LargestInt(value);
1647 } else {
1648 decoded = value;
1649 }
1650
1651 return true;
1652 }
1653
decodeDouble(Token & token)1654 bool OurReader::decodeDouble(Token& token) {
1655 Value decoded;
1656 if (!decodeDouble(token, decoded))
1657 return false;
1658 currentValue().swapPayload(decoded);
1659 currentValue().setOffsetStart(token.start_ - begin_);
1660 currentValue().setOffsetLimit(token.end_ - begin_);
1661 return true;
1662 }
1663
decodeDouble(Token & token,Value & decoded)1664 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1665 double value = 0;
1666 const String buffer(token.start_, token.end_);
1667 IStringStream is(buffer);
1668 if (!(is >> value)) {
1669 // the value could be lower than numeric_limits<double>::min(), in this situtation we should return the value with the gurantee
1670 // of conversion which has been performed and no occurances of range error.
1671 if ((value > 0 && value < std::numeric_limits<double>::min()) || (value < 0 && value > -std::numeric_limits<double>::min())) {
1672 decoded = value;
1673 return true;
1674 }
1675 return addError(
1676 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1677 }
1678 decoded = value;
1679 return true;
1680 }
1681
decodeString(Token & token)1682 bool OurReader::decodeString(Token& token) {
1683 String decoded_string;
1684 if (!decodeString(token, decoded_string))
1685 return false;
1686 Value decoded(decoded_string);
1687 currentValue().swapPayload(decoded);
1688 currentValue().setOffsetStart(token.start_ - begin_);
1689 currentValue().setOffsetLimit(token.end_ - begin_);
1690 return true;
1691 }
1692
decodeString(Token & token,String & decoded)1693 bool OurReader::decodeString(Token& token, String& decoded) {
1694 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1695 Location current = token.start_ + 1; // skip '"'
1696 Location end = token.end_ - 1; // do not include '"'
1697 while (current != end) {
1698 Char c = *current++;
1699 if (c == '"')
1700 break;
1701 if (c == '\\') {
1702 if (current == end)
1703 return addError("Empty escape sequence in string", token, current);
1704 Char escape = *current++;
1705 switch (escape) {
1706 case '"':
1707 decoded += '"';
1708 break;
1709 case '/':
1710 decoded += '/';
1711 break;
1712 case '\\':
1713 decoded += '\\';
1714 break;
1715 case 'b':
1716 decoded += '\b';
1717 break;
1718 case 'f':
1719 decoded += '\f';
1720 break;
1721 case 'n':
1722 decoded += '\n';
1723 break;
1724 case 'r':
1725 decoded += '\r';
1726 break;
1727 case 't':
1728 decoded += '\t';
1729 break;
1730 case 'u': {
1731 unsigned int unicode;
1732 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1733 return false;
1734 decoded += codePointToUTF8(unicode);
1735 } break;
1736 default:
1737 return addError("Bad escape sequence in string", token, current);
1738 }
1739 } else {
1740 decoded += c;
1741 }
1742 }
1743 return true;
1744 }
1745
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1746 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1747 Location end, unsigned int& unicode) {
1748
1749 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1750 return false;
1751 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1752 // surrogate pairs
1753 if (end - current < 6)
1754 return addError(
1755 "additional six characters expected to parse unicode surrogate pair.",
1756 token, current);
1757 if (*(current++) == '\\' && *(current++) == 'u') {
1758 unsigned int surrogatePair;
1759 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1760 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1761 } else
1762 return false;
1763 } else
1764 return addError("expecting another \\u token to begin the second half of "
1765 "a unicode surrogate pair",
1766 token, current);
1767 }
1768 return true;
1769 }
1770
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1771 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1772 Location end,
1773 unsigned int& ret_unicode) {
1774 if (end - current < 4)
1775 return addError(
1776 "Bad unicode escape sequence in string: four digits expected.", token,
1777 current);
1778 int unicode = 0;
1779 for (int index = 0; index < 4; ++index) {
1780 Char c = *current++;
1781 unicode *= 16;
1782 if (c >= '0' && c <= '9')
1783 unicode += c - '0';
1784 else if (c >= 'a' && c <= 'f')
1785 unicode += c - 'a' + 10;
1786 else if (c >= 'A' && c <= 'F')
1787 unicode += c - 'A' + 10;
1788 else
1789 return addError(
1790 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1791 token, current);
1792 }
1793 ret_unicode = static_cast<unsigned int>(unicode);
1794 return true;
1795 }
1796
addError(const String & message,Token & token,Location extra)1797 bool OurReader::addError(const String& message, Token& token, Location extra) {
1798 ErrorInfo info;
1799 info.token_ = token;
1800 info.message_ = message;
1801 info.extra_ = extra;
1802 errors_.push_back(info);
1803 return false;
1804 }
1805
recoverFromError(TokenType skipUntilToken)1806 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1807 size_t errorCount = errors_.size();
1808 Token skip;
1809 for (;;) {
1810 if (!readToken(skip))
1811 errors_.resize(errorCount); // discard errors caused by recovery
1812 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1813 break;
1814 }
1815 errors_.resize(errorCount);
1816 return false;
1817 }
1818
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1819 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1820 TokenType skipUntilToken) {
1821 addError(message, token);
1822 return recoverFromError(skipUntilToken);
1823 }
1824
currentValue()1825 Value& OurReader::currentValue() { return *(nodes_.top()); }
1826
getNextChar()1827 OurReader::Char OurReader::getNextChar() {
1828 if (current_ == end_)
1829 return 0;
1830 return *current_++;
1831 }
1832
getLocationLineAndColumn(Location location,int & line,int & column) const1833 void OurReader::getLocationLineAndColumn(Location location, int& line,
1834 int& column) const {
1835 Location current = begin_;
1836 Location lastLineStart = current;
1837 line = 0;
1838 while (current < location && current != end_) {
1839 Char c = *current++;
1840 if (c == '\r') {
1841 if (*current == '\n')
1842 ++current;
1843 lastLineStart = current;
1844 ++line;
1845 } else if (c == '\n') {
1846 lastLineStart = current;
1847 ++line;
1848 }
1849 }
1850 // column & line start at 1
1851 column = int(location - lastLineStart) + 1;
1852 ++line;
1853 }
1854
getLocationLineAndColumn(Location location) const1855 String OurReader::getLocationLineAndColumn(Location location) const {
1856 int line, column;
1857 getLocationLineAndColumn(location, line, column);
1858 char buffer[18 + 16 + 16 + 1];
1859 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1860 return buffer;
1861 }
1862
getFormattedErrorMessages() const1863 String OurReader::getFormattedErrorMessages() const {
1864 String formattedMessage;
1865 for (Errors::const_iterator itError = errors_.begin();
1866 itError != errors_.end(); ++itError) {
1867 const ErrorInfo& error = *itError;
1868 formattedMessage +=
1869 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1870 formattedMessage += " " + error.message_ + "\n";
1871 if (error.extra_)
1872 formattedMessage +=
1873 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1874 }
1875 return formattedMessage;
1876 }
1877
getStructuredErrors() const1878 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1879 std::vector<OurReader::StructuredError> allErrors;
1880 for (Errors::const_iterator itError = errors_.begin();
1881 itError != errors_.end(); ++itError) {
1882 const ErrorInfo& error = *itError;
1883 OurReader::StructuredError structured;
1884 structured.offset_start = error.token_.start_ - begin_;
1885 structured.offset_limit = error.token_.end_ - begin_;
1886 structured.message = error.message_;
1887 allErrors.push_back(structured);
1888 }
1889 return allErrors;
1890 }
1891
1892 class OurCharReader : public CharReader {
1893 bool const collectComments_;
1894 OurReader reader_;
1895
1896 public:
OurCharReader(bool collectComments,OurFeatures const & features)1897 OurCharReader(bool collectComments, OurFeatures const& features)
1898 : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1899 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1900 String* errs) JSONCPP_OVERRIDE {
1901 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1902 if (errs) {
1903 *errs = reader_.getFormattedErrorMessages();
1904 }
1905 return ok;
1906 }
1907 };
1908
CharReaderBuilder()1909 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
~CharReaderBuilder()1910 CharReaderBuilder::~CharReaderBuilder() {}
newCharReader() const1911 CharReader* CharReaderBuilder::newCharReader() const {
1912 bool collectComments = settings_["collectComments"].asBool();
1913 OurFeatures features = OurFeatures::all();
1914 features.allowComments_ = settings_["allowComments"].asBool();
1915 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1916 features.strictRoot_ = settings_["strictRoot"].asBool();
1917 features.allowDroppedNullPlaceholders_ =
1918 settings_["allowDroppedNullPlaceholders"].asBool();
1919 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1920 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1921
1922 // Stack limit is always a size_t, so we get this as an unsigned int
1923 // regardless of it we have 64-bit integer support enabled.
1924 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1925 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1926 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1927 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1928 features.skipBom_ = settings_["skipBom"].asBool();
1929 return new OurCharReader(collectComments, features);
1930 }
getValidReaderKeys(std::set<String> * valid_keys)1931 static void getValidReaderKeys(std::set<String>* valid_keys) {
1932 valid_keys->clear();
1933 valid_keys->insert("collectComments");
1934 valid_keys->insert("allowComments");
1935 valid_keys->insert("allowTrailingCommas");
1936 valid_keys->insert("strictRoot");
1937 valid_keys->insert("allowDroppedNullPlaceholders");
1938 valid_keys->insert("allowNumericKeys");
1939 valid_keys->insert("allowSingleQuotes");
1940 valid_keys->insert("stackLimit");
1941 valid_keys->insert("failIfExtra");
1942 valid_keys->insert("rejectDupKeys");
1943 valid_keys->insert("allowSpecialFloats");
1944 valid_keys->insert("skipBom");
1945 }
validate(Json::Value * invalid) const1946 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1947 Json::Value my_invalid;
1948 if (!invalid)
1949 invalid = &my_invalid; // so we do not need to test for NULL
1950 Json::Value& inv = *invalid;
1951 std::set<String> valid_keys;
1952 getValidReaderKeys(&valid_keys);
1953 Value::Members keys = settings_.getMemberNames();
1954 size_t n = keys.size();
1955 for (size_t i = 0; i < n; ++i) {
1956 String const& key = keys[i];
1957 if (valid_keys.find(key) == valid_keys.end()) {
1958 inv[key] = settings_[key];
1959 }
1960 }
1961 return inv.empty();
1962 }
operator [](const String & key)1963 Value& CharReaderBuilder::operator[](const String& key) {
1964 return settings_[key];
1965 }
1966 // static
strictMode(Json::Value * settings)1967 void CharReaderBuilder::strictMode(Json::Value* settings) {
1968 //! [CharReaderBuilderStrictMode]
1969 (*settings)["allowComments"] = false;
1970 (*settings)["allowTrailingCommas"] = false;
1971 (*settings)["strictRoot"] = true;
1972 (*settings)["allowDroppedNullPlaceholders"] = false;
1973 (*settings)["allowNumericKeys"] = false;
1974 (*settings)["allowSingleQuotes"] = false;
1975 (*settings)["stackLimit"] = 1000;
1976 (*settings)["failIfExtra"] = true;
1977 (*settings)["rejectDupKeys"] = true;
1978 (*settings)["allowSpecialFloats"] = false;
1979 (*settings)["skipBom"] = true;
1980 //! [CharReaderBuilderStrictMode]
1981 }
1982 // static
setDefaults(Json::Value * settings)1983 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1984 //! [CharReaderBuilderDefaults]
1985 (*settings)["collectComments"] = true;
1986 (*settings)["allowComments"] = true;
1987 (*settings)["allowTrailingCommas"] = true;
1988 (*settings)["strictRoot"] = false;
1989 (*settings)["allowDroppedNullPlaceholders"] = false;
1990 (*settings)["allowNumericKeys"] = false;
1991 (*settings)["allowSingleQuotes"] = false;
1992 (*settings)["stackLimit"] = 1000;
1993 (*settings)["failIfExtra"] = false;
1994 (*settings)["rejectDupKeys"] = false;
1995 (*settings)["allowSpecialFloats"] = false;
1996 (*settings)["skipBom"] = true;
1997 //! [CharReaderBuilderDefaults]
1998 }
1999
2000 //////////////////////////////////
2001 // global functions
2002
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)2003 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
2004 String* errs) {
2005 OStringStream ssin;
2006 ssin << sin.rdbuf();
2007 String doc = ssin.str();
2008 char const* begin = doc.data();
2009 char const* end = begin + doc.size();
2010 // Note that we do not actually need a null-terminator.
2011 CharReaderPtr const reader(fact.newCharReader());
2012 bool ret = reader->parse(begin, end, root, errs);
2013 delete reader;
2014 return ret;
2015 }
2016
operator >>(IStream & sin,Value & root)2017 IStream& operator>>(IStream& sin, Value& root) {
2018 CharReaderBuilder b;
2019 String errs;
2020 bool ok = parseFromStream(b, sin, &root, &errs);
2021 if (!ok) {
2022 throwRuntimeError(errs);
2023 }
2024 return sin;
2025 }
2026
2027 } // namespace Json
2028