1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <iostream>
16 #include <istream>
17 #include <limits>
18 #include <memory>
19 #include <set>
20 #include <sstream>
21 #include <utility>
22
23 #include <cstdio>
24 #if __cplusplus >= 201103L
25
26 #if !defined(sscanf)
27 #define sscanf std::sscanf
28 #endif
29
30 #endif //__cplusplus
31
32 #if defined(_MSC_VER)
33 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
34 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
35 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
36 #endif //_MSC_VER
37
38 #if defined(_MSC_VER)
39 // Disable warning about strdup being deprecated.
40 #pragma warning(disable : 4996)
41 #endif
42
43 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
44 // time to change the stack limit
45 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
46 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
47 #endif
48
49 static size_t const stackLimit_g =
50 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
51
52 namespace Json {
53
54 typedef CharReader* CharReaderPtr;
55
56 // Implementation of class Features
57 // ////////////////////////////////
58
Features()59 Features::Features()
60 : allowComments_(true), strictRoot_(false),
61 allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
all()62 Features Features::all() { return Features(); }
63
strictMode()64 Features Features::strictMode() {
65 Features features;
66 features.allowComments_ = false;
67 features.strictRoot_ = true;
68 features.allowDroppedNullPlaceholders_ = false;
69 features.allowNumericKeys_ = false;
70 return features;
71 }
72
73 // Implementation of class Reader
74 // ////////////////////////////////
75
containsNewLine(Reader::Location begin,Reader::Location end)76 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
77 for (; begin < end; ++begin)
78 if (*begin == '\n' || *begin == '\r')
79 return true;
80 return false;
81 }
82
83 // Class Reader
84 // //////////////////////////////////////////////////////////////////
85
Reader()86 Reader::Reader()
87 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
88 lastValue_(), commentsBefore_(), features_(Features::all()),
89 collectComments_() {}
90
Reader(const Features & features)91 Reader::Reader(const Features& features)
92 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document, Value& root,
97 bool collectComments) {
98 document_.assign(document.begin(), document.end());
99 const char* begin = document_.c_str();
100 const char* end = begin + document_.length();
101 return parse(begin, end, root, collectComments);
102 }
103
parse(std::istream & is,Value & root,bool collectComments)104 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
105 // std::istream_iterator<char> begin(is);
106 // std::istream_iterator<char> end;
107 // Those would allow streamed input from a file, if parse() were a
108 // template function.
109
110 // Since String is reference-counted, this at least does not
111 // create an extra copy.
112 String doc;
113 std::getline(is, doc, static_cast<char> EOF);
114 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
115 }
116
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)117 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
118 bool collectComments) {
119 if (!features_.allowComments_) {
120 collectComments = false;
121 }
122
123 begin_ = beginDoc;
124 end_ = endDoc;
125 collectComments_ = collectComments;
126 current_ = begin_;
127 lastValueEnd_ = JSONCPP_NULL;
128 lastValue_ = JSONCPP_NULL;
129 commentsBefore_.clear();
130 errors_.clear();
131 while (!nodes_.empty())
132 nodes_.pop();
133 nodes_.push(&root);
134
135 bool successful = readValue();
136 Token token;
137 skipCommentTokens(token);
138 if (collectComments_ && !commentsBefore_.empty())
139 root.setComment(commentsBefore_, commentAfter);
140 if (features_.strictRoot_) {
141 if (!root.isArray() && !root.isObject()) {
142 // Set error location to start of doc, ideally should be first token found
143 // in doc
144 token.type_ = tokenError;
145 token.start_ = beginDoc;
146 token.end_ = endDoc;
147 addError(
148 "A valid JSON document must be either an array or an object value.",
149 token);
150 return false;
151 }
152 }
153 return successful;
154 }
155
readValue()156 bool Reader::readValue() {
157 // readValue() may call itself only if it calls readObject() or ReadArray().
158 // These methods execute nodes_.push() just before and nodes_.pop)() just
159 // after calling readValue(). parse() executes one nodes_.push(), so > instead
160 // of >=.
161 if (nodes_.size() > stackLimit_g)
162 throwRuntimeError("Exceeded stackLimit in readValue().");
163
164 Token token;
165 skipCommentTokens(token);
166 bool successful = true;
167
168 if (collectComments_ && !commentsBefore_.empty()) {
169 currentValue().setComment(commentsBefore_, commentBefore);
170 commentsBefore_.clear();
171 }
172
173 switch (token.type_) {
174 case tokenObjectBegin:
175 successful = readObject(token);
176 currentValue().setOffsetLimit(current_ - begin_);
177 break;
178 case tokenArrayBegin:
179 successful = readArray(token);
180 currentValue().setOffsetLimit(current_ - begin_);
181 break;
182 case tokenNumber:
183 successful = decodeNumber(token);
184 break;
185 case tokenString:
186 successful = decodeString(token);
187 break;
188 case tokenTrue: {
189 Value v(true);
190 currentValue().swapPayload(v);
191 currentValue().setOffsetStart(token.start_ - begin_);
192 currentValue().setOffsetLimit(token.end_ - begin_);
193 } break;
194 case tokenFalse: {
195 Value v(false);
196 currentValue().swapPayload(v);
197 currentValue().setOffsetStart(token.start_ - begin_);
198 currentValue().setOffsetLimit(token.end_ - begin_);
199 } break;
200 case tokenNull: {
201 Value v;
202 currentValue().swapPayload(v);
203 currentValue().setOffsetStart(token.start_ - begin_);
204 currentValue().setOffsetLimit(token.end_ - begin_);
205 } break;
206 case tokenArraySeparator:
207 case tokenObjectEnd:
208 case tokenArrayEnd:
209 if (features_.allowDroppedNullPlaceholders_) {
210 // "Un-read" the current token and mark the current value as a null
211 // token.
212 current_--;
213 Value v;
214 currentValue().swapPayload(v);
215 currentValue().setOffsetStart(current_ - begin_ - 1);
216 currentValue().setOffsetLimit(current_ - begin_);
217 break;
218 } // Else, fall through...
219 default:
220 currentValue().setOffsetStart(token.start_ - begin_);
221 currentValue().setOffsetLimit(token.end_ - begin_);
222 return addError("Syntax error: value, object or array expected.", token);
223 }
224
225 if (collectComments_) {
226 lastValueEnd_ = current_;
227 lastValue_ = ¤tValue();
228 }
229
230 return successful;
231 }
232
skipCommentTokens(Token & token)233 void Reader::skipCommentTokens(Token& token) {
234 if (features_.allowComments_) {
235 do {
236 readToken(token);
237 } while (token.type_ == tokenComment);
238 } else {
239 readToken(token);
240 }
241 }
242
readToken(Token & token)243 bool Reader::readToken(Token& token) {
244 skipSpaces();
245 token.start_ = current_;
246 Char c = getNextChar();
247 bool ok = true;
248 switch (c) {
249 case '{':
250 token.type_ = tokenObjectBegin;
251 break;
252 case '}':
253 token.type_ = tokenObjectEnd;
254 break;
255 case '[':
256 token.type_ = tokenArrayBegin;
257 break;
258 case ']':
259 token.type_ = tokenArrayEnd;
260 break;
261 case '"':
262 token.type_ = tokenString;
263 ok = readString();
264 break;
265 case '/':
266 token.type_ = tokenComment;
267 ok = readComment();
268 break;
269 case '0':
270 case '1':
271 case '2':
272 case '3':
273 case '4':
274 case '5':
275 case '6':
276 case '7':
277 case '8':
278 case '9':
279 case '-':
280 token.type_ = tokenNumber;
281 readNumber();
282 break;
283 case 't':
284 token.type_ = tokenTrue;
285 ok = match("rue", 3);
286 break;
287 case 'f':
288 token.type_ = tokenFalse;
289 ok = match("alse", 4);
290 break;
291 case 'n':
292 token.type_ = tokenNull;
293 ok = match("ull", 3);
294 break;
295 case ',':
296 token.type_ = tokenArraySeparator;
297 break;
298 case ':':
299 token.type_ = tokenMemberSeparator;
300 break;
301 case 0:
302 token.type_ = tokenEndOfStream;
303 break;
304 default:
305 ok = false;
306 break;
307 }
308 if (!ok)
309 token.type_ = tokenError;
310 token.end_ = current_;
311 return ok;
312 }
313
skipSpaces()314 void Reader::skipSpaces() {
315 while (current_ != end_) {
316 Char c = *current_;
317 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
318 ++current_;
319 else
320 break;
321 }
322 }
323
match(const Char * pattern,int patternLength)324 bool Reader::match(const Char* pattern, int patternLength) {
325 if (end_ - current_ < patternLength)
326 return false;
327 int index = patternLength;
328 while (index--)
329 if (current_[index] != pattern[index])
330 return false;
331 current_ += patternLength;
332 return true;
333 }
334
readComment()335 bool Reader::readComment() {
336 Location commentBegin = current_ - 1;
337 Char c = getNextChar();
338 bool successful = false;
339 if (c == '*')
340 successful = readCStyleComment();
341 else if (c == '/')
342 successful = readCppStyleComment();
343 if (!successful)
344 return false;
345
346 if (collectComments_) {
347 CommentPlacement placement = commentBefore;
348 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
349 if (c != '*' || !containsNewLine(commentBegin, current_))
350 placement = commentAfterOnSameLine;
351 }
352
353 addComment(commentBegin, current_, placement);
354 }
355 return true;
356 }
357
normalizeEOL(Reader::Location begin,Reader::Location end)358 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
359 String normalized;
360 normalized.reserve(static_cast<size_t>(end - begin));
361 Reader::Location current = begin;
362 while (current != end) {
363 char c = *current++;
364 if (c == '\r') {
365 if (current != end && *current == '\n')
366 // convert dos EOL
367 ++current;
368 // convert Mac EOL
369 normalized += '\n';
370 } else {
371 normalized += c;
372 }
373 }
374 return normalized;
375 }
376
addComment(Location begin,Location end,CommentPlacement placement)377 void Reader::addComment(Location begin, Location end,
378 CommentPlacement placement) {
379 assert(collectComments_);
380 const String& normalized = normalizeEOL(begin, end);
381 if (placement == commentAfterOnSameLine) {
382 assert(lastValue_ != JSONCPP_NULL);
383 lastValue_->setComment(normalized, placement);
384 } else {
385 commentsBefore_ += normalized;
386 }
387 }
388
readCStyleComment()389 bool Reader::readCStyleComment() {
390 while ((current_ + 1) < end_) {
391 Char c = getNextChar();
392 if (c == '*' && *current_ == '/')
393 break;
394 }
395 return getNextChar() == '/';
396 }
397
readCppStyleComment()398 bool Reader::readCppStyleComment() {
399 while (current_ != end_) {
400 Char c = getNextChar();
401 if (c == '\n')
402 break;
403 if (c == '\r') {
404 // Consume DOS EOL. It will be normalized in addComment.
405 if (current_ != end_ && *current_ == '\n')
406 getNextChar();
407 // Break on Moc OS 9 EOL.
408 break;
409 }
410 }
411 return true;
412 }
413
readNumber()414 void Reader::readNumber() {
415 Location p = current_;
416 char c = '0'; // stopgap for already consumed character
417 // integral part
418 while (c >= '0' && c <= '9')
419 c = (current_ = p) < end_ ? *p++ : '\0';
420 // fractional part
421 if (c == '.') {
422 c = (current_ = p) < end_ ? *p++ : '\0';
423 while (c >= '0' && c <= '9')
424 c = (current_ = p) < end_ ? *p++ : '\0';
425 }
426 // exponential part
427 if (c == 'e' || c == 'E') {
428 c = (current_ = p) < end_ ? *p++ : '\0';
429 if (c == '+' || c == '-')
430 c = (current_ = p) < end_ ? *p++ : '\0';
431 while (c >= '0' && c <= '9')
432 c = (current_ = p) < end_ ? *p++ : '\0';
433 }
434 }
435
readString()436 bool Reader::readString() {
437 Char c = '\0';
438 while (current_ != end_) {
439 c = getNextChar();
440 if (c == '\\')
441 getNextChar();
442 else if (c == '"')
443 break;
444 }
445 return c == '"';
446 }
447
readObject(Token & token)448 bool Reader::readObject(Token& token) {
449 Token tokenName;
450 String name;
451 Value init(objectValue);
452 currentValue().swapPayload(init);
453 currentValue().setOffsetStart(token.start_ - begin_);
454 while (readToken(tokenName)) {
455 bool initialTokenOk = true;
456 while (tokenName.type_ == tokenComment && initialTokenOk)
457 initialTokenOk = readToken(tokenName);
458 if (!initialTokenOk)
459 break;
460 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
461 return true;
462 name.clear();
463 if (tokenName.type_ == tokenString) {
464 if (!decodeString(tokenName, name))
465 return recoverFromError(tokenObjectEnd);
466 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
467 Value numberName;
468 if (!decodeNumber(tokenName, numberName))
469 return recoverFromError(tokenObjectEnd);
470 name = numberName.asString();
471 } else {
472 break;
473 }
474
475 Token colon;
476 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
477 return addErrorAndRecover("Missing ':' after object member name", colon,
478 tokenObjectEnd);
479 }
480 Value& value = currentValue()[name];
481 nodes_.push(&value);
482 bool ok = readValue();
483 nodes_.pop();
484 if (!ok) // error already set
485 return recoverFromError(tokenObjectEnd);
486
487 Token comma;
488 if (!readToken(comma) ||
489 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
490 comma.type_ != tokenComment)) {
491 return addErrorAndRecover("Missing ',' or '}' in object declaration",
492 comma, tokenObjectEnd);
493 }
494 bool finalizeTokenOk = true;
495 while (comma.type_ == tokenComment && finalizeTokenOk)
496 finalizeTokenOk = readToken(comma);
497 if (comma.type_ == tokenObjectEnd)
498 return true;
499 }
500 return addErrorAndRecover("Missing '}' or object member name", tokenName,
501 tokenObjectEnd);
502 }
503
readArray(Token & token)504 bool Reader::readArray(Token& token) {
505 Value init(arrayValue);
506 currentValue().swapPayload(init);
507 currentValue().setOffsetStart(token.start_ - begin_);
508 skipSpaces();
509 if (current_ != end_ && *current_ == ']') // empty array
510 {
511 Token endArray;
512 readToken(endArray);
513 return true;
514 }
515 int index = 0;
516 for (;;) {
517 Value& value = currentValue()[index++];
518 nodes_.push(&value);
519 bool ok = readValue();
520 nodes_.pop();
521 if (!ok) // error already set
522 return recoverFromError(tokenArrayEnd);
523
524 Token currentToken;
525 // Accept Comment after last item in the array.
526 ok = readToken(currentToken);
527 while (currentToken.type_ == tokenComment && ok) {
528 ok = readToken(currentToken);
529 }
530 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
531 currentToken.type_ != tokenArrayEnd);
532 if (!ok || badTokenType) {
533 return addErrorAndRecover("Missing ',' or ']' in array declaration",
534 currentToken, tokenArrayEnd);
535 }
536 if (currentToken.type_ == tokenArrayEnd)
537 break;
538 }
539 return true;
540 }
541
decodeNumber(Token & token)542 bool Reader::decodeNumber(Token& token) {
543 Value decoded;
544 if (!decodeNumber(token, decoded))
545 return false;
546 currentValue().swapPayload(decoded);
547 currentValue().setOffsetStart(token.start_ - begin_);
548 currentValue().setOffsetLimit(token.end_ - begin_);
549 return true;
550 }
551
decodeNumber(Token & token,Value & decoded)552 bool Reader::decodeNumber(Token& token, Value& decoded) {
553 // Attempts to parse the number as an integer. If the number is
554 // larger than the maximum supported value of an integer then
555 // we decode the number as a double.
556 Location current = token.start_;
557 bool isNegative = *current == '-';
558 if (isNegative)
559 ++current;
560 // TODO: Help the compiler do the div and mod at compile time or get rid of
561 // them.
562 Value::LargestUInt maxIntegerValue =
563 isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
564 : Value::maxLargestUInt;
565 Value::LargestUInt threshold = maxIntegerValue / 10;
566 Value::LargestUInt value = 0;
567 while (current < token.end_) {
568 Char c = *current++;
569 if (c < '0' || c > '9')
570 return decodeDouble(token, decoded);
571 Value::UInt digit(static_cast<Value::UInt>(c - '0'));
572 if (value >= threshold) {
573 // We've hit or exceeded the max value divided by 10 (rounded down). If
574 // a) we've only just touched the limit, b) this is the last digit, and
575 // c) it's small enough to fit in that rounding delta, we're okay.
576 // Otherwise treat this number as a double to avoid overflow.
577 if (value > threshold || current != token.end_ ||
578 digit > maxIntegerValue % 10) {
579 return decodeDouble(token, decoded);
580 }
581 }
582 value = value * 10 + digit;
583 }
584 if (isNegative && value == maxIntegerValue)
585 decoded = Value::minLargestInt;
586 else if (isNegative)
587 decoded = -Value::LargestInt(value);
588 else if (value <= Value::LargestUInt(Value::maxInt))
589 decoded = Value::LargestInt(value);
590 else
591 decoded = value;
592 return true;
593 }
594
decodeDouble(Token & token)595 bool Reader::decodeDouble(Token& token) {
596 Value decoded;
597 if (!decodeDouble(token, decoded))
598 return false;
599 currentValue().swapPayload(decoded);
600 currentValue().setOffsetStart(token.start_ - begin_);
601 currentValue().setOffsetLimit(token.end_ - begin_);
602 return true;
603 }
604
decodeDouble(Token & token,Value & decoded)605 bool Reader::decodeDouble(Token& token, Value& decoded) {
606 double value = 0;
607 String buffer(token.start_, token.end_);
608 IStringStream is(buffer);
609 if (!(is >> value))
610 return addError(
611 "'" + String(token.start_, token.end_) + "' is not a number.", token);
612 decoded = value;
613 return true;
614 }
615
decodeString(Token & token)616 bool Reader::decodeString(Token& token) {
617 String decoded_string;
618 if (!decodeString(token, decoded_string))
619 return false;
620 Value decoded(decoded_string);
621 currentValue().swapPayload(decoded);
622 currentValue().setOffsetStart(token.start_ - begin_);
623 currentValue().setOffsetLimit(token.end_ - begin_);
624 return true;
625 }
626
decodeString(Token & token,String & decoded)627 bool Reader::decodeString(Token& token, String& decoded) {
628 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
629 Location current = token.start_ + 1; // skip '"'
630 Location end = token.end_ - 1; // do not include '"'
631 while (current != end) {
632 Char c = *current++;
633 if (c == '"')
634 break;
635 if (c == '\\') {
636 if (current == end)
637 return addError("Empty escape sequence in string", token, current);
638 Char escape = *current++;
639 switch (escape) {
640 case '"':
641 decoded += '"';
642 break;
643 case '/':
644 decoded += '/';
645 break;
646 case '\\':
647 decoded += '\\';
648 break;
649 case 'b':
650 decoded += '\b';
651 break;
652 case 'f':
653 decoded += '\f';
654 break;
655 case 'n':
656 decoded += '\n';
657 break;
658 case 'r':
659 decoded += '\r';
660 break;
661 case 't':
662 decoded += '\t';
663 break;
664 case 'u': {
665 unsigned int unicode;
666 if (!decodeUnicodeCodePoint(token, current, end, unicode))
667 return false;
668 decoded += codePointToUTF8(unicode);
669 } break;
670 default:
671 return addError("Bad escape sequence in string", token, current);
672 }
673 } else {
674 decoded += c;
675 }
676 }
677 return true;
678 }
679
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)680 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
681 Location end, unsigned int& unicode) {
682
683 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
684 return false;
685 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
686 // surrogate pairs
687 if (end - current < 6)
688 return addError(
689 "additional six characters expected to parse unicode surrogate pair.",
690 token, current);
691 if (*(current++) == '\\' && *(current++) == 'u') {
692 unsigned int surrogatePair;
693 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
694 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
695 } else
696 return false;
697 } else
698 return addError("expecting another \\u token to begin the second half of "
699 "a unicode surrogate pair",
700 token, current);
701 }
702 return true;
703 }
704
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)705 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
706 Location end,
707 unsigned int& ret_unicode) {
708 if (end - current < 4)
709 return addError(
710 "Bad unicode escape sequence in string: four digits expected.", token,
711 current);
712 int unicode = 0;
713 for (int index = 0; index < 4; ++index) {
714 Char c = *current++;
715 unicode *= 16;
716 if (c >= '0' && c <= '9')
717 unicode += c - '0';
718 else if (c >= 'a' && c <= 'f')
719 unicode += c - 'a' + 10;
720 else if (c >= 'A' && c <= 'F')
721 unicode += c - 'A' + 10;
722 else
723 return addError(
724 "Bad unicode escape sequence in string: hexadecimal digit expected.",
725 token, current);
726 }
727 ret_unicode = static_cast<unsigned int>(unicode);
728 return true;
729 }
730
addError(const String & message,Token & token,Location extra)731 bool Reader::addError(const String& message, Token& token, Location extra) {
732 ErrorInfo info;
733 info.token_ = token;
734 info.message_ = message;
735 info.extra_ = extra;
736 errors_.push_back(info);
737 return false;
738 }
739
recoverFromError(TokenType skipUntilToken)740 bool Reader::recoverFromError(TokenType skipUntilToken) {
741 size_t const errorCount = errors_.size();
742 Token skip;
743 for (;;) {
744 if (!readToken(skip))
745 errors_.resize(errorCount); // discard errors caused by recovery
746 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
747 break;
748 }
749 errors_.resize(errorCount);
750 return false;
751 }
752
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)753 bool Reader::addErrorAndRecover(const String& message, Token& token,
754 TokenType skipUntilToken) {
755 addError(message, token);
756 return recoverFromError(skipUntilToken);
757 }
758
currentValue()759 Value& Reader::currentValue() { return *(nodes_.top()); }
760
getNextChar()761 Reader::Char Reader::getNextChar() {
762 if (current_ == end_)
763 return 0;
764 return *current_++;
765 }
766
getLocationLineAndColumn(Location location,int & line,int & column) const767 void Reader::getLocationLineAndColumn(Location location, int& line,
768 int& column) const {
769 Location current = begin_;
770 Location lastLineStart = current;
771 line = 0;
772 while (current < location && current != end_) {
773 Char c = *current++;
774 if (c == '\r') {
775 // Add boundary check to avoid cross the border
776 if (current == end_) {
777 break;
778 }
779 if (*current == '\n')
780 ++current;
781 lastLineStart = current;
782 ++line;
783 } else if (c == '\n') {
784 lastLineStart = current;
785 ++line;
786 }
787 }
788 // column & line start at 1
789 column = int(location - lastLineStart) + 1;
790 ++line;
791 }
792
getLocationLineAndColumn(Location location) const793 String Reader::getLocationLineAndColumn(Location location) const {
794 int line, column;
795 getLocationLineAndColumn(location, line, column);
796 char buffer[18 + 16 + 16 + 1];
797 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
798 return buffer;
799 }
800
801 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const802 String Reader::getFormatedErrorMessages() const {
803 return getFormattedErrorMessages();
804 }
805
getFormattedErrorMessages() const806 String Reader::getFormattedErrorMessages() const {
807 String formattedMessage;
808 for (Errors::const_iterator itError = errors_.begin();
809 itError != errors_.end(); ++itError) {
810 const ErrorInfo& error = *itError;
811 formattedMessage +=
812 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
813 formattedMessage += " " + error.message_ + "\n";
814 if (error.extra_)
815 formattedMessage +=
816 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
817 }
818 return formattedMessage;
819 }
820
getStructuredErrors() const821 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
822 std::vector<Reader::StructuredError> allErrors;
823 for (Errors::const_iterator itError = errors_.begin();
824 itError != errors_.end(); ++itError) {
825 const ErrorInfo& error = *itError;
826 Reader::StructuredError structured;
827 structured.offset_start = error.token_.start_ - begin_;
828 structured.offset_limit = error.token_.end_ - begin_;
829 structured.message = error.message_;
830 allErrors.push_back(structured);
831 }
832 return allErrors;
833 }
834
pushError(const Value & value,const String & message)835 bool Reader::pushError(const Value& value, const String& message) {
836 ptrdiff_t const length = end_ - begin_;
837 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
838 return false;
839 Token token;
840 token.type_ = tokenError;
841 token.start_ = begin_ + value.getOffsetStart();
842 token.end_ = begin_ + value.getOffsetLimit();
843 ErrorInfo info;
844 info.token_ = token;
845 info.message_ = message;
846 info.extra_ = JSONCPP_NULL;
847 errors_.push_back(info);
848 return true;
849 }
850
pushError(const Value & value,const String & message,const Value & extra)851 bool Reader::pushError(const Value& value, const String& message,
852 const Value& extra) {
853 ptrdiff_t const length = end_ - begin_;
854 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
855 extra.getOffsetLimit() > length)
856 return false;
857 Token token;
858 token.type_ = tokenError;
859 token.start_ = begin_ + value.getOffsetStart();
860 token.end_ = begin_ + value.getOffsetLimit();
861 ErrorInfo info;
862 info.token_ = token;
863 info.message_ = message;
864 info.extra_ = begin_ + extra.getOffsetStart();
865 errors_.push_back(info);
866 return true;
867 }
868
good() const869 bool Reader::good() const { return errors_.empty(); }
870
871 // Originally copied from the Features class (now deprecated), used internally
872 // for features implementation.
873 class OurFeatures {
874 public:
875 static OurFeatures all();
876 bool allowComments_;
877 bool allowTrailingCommas_;
878 bool strictRoot_;
879 bool allowDroppedNullPlaceholders_;
880 bool allowNumericKeys_;
881 bool allowSingleQuotes_;
882 bool failIfExtra_;
883 bool rejectDupKeys_;
884 bool allowSpecialFloats_;
885 bool skipBom_;
886 size_t stackLimit_;
887 }; // OurFeatures
888
all()889 OurFeatures OurFeatures::all() { return OurFeatures(); }
890
891 // Implementation of class Reader
892 // ////////////////////////////////
893
894 // Originally copied from the Reader class (now deprecated), used internally
895 // for implementing JSON reading.
896 class OurReader {
897 public:
898 typedef char Char;
899 typedef const Char* Location;
900 struct StructuredError {
901 ptrdiff_t offset_start;
902 ptrdiff_t offset_limit;
903 String message;
904 };
905
906 JSONCPP_OP_EXPLICIT OurReader(OurFeatures const& features);
907 bool parse(const char* beginDoc, const char* endDoc, Value& root,
908 bool collectComments = true);
909 String getFormattedErrorMessages() const;
910 std::vector<StructuredError> getStructuredErrors() const;
911
912 private:
913 OurReader(OurReader const&); // no impl
914 void operator=(OurReader const&); // no impl
915
916 enum TokenType {
917 tokenEndOfStream = 0,
918 tokenObjectBegin,
919 tokenObjectEnd,
920 tokenArrayBegin,
921 tokenArrayEnd,
922 tokenString,
923 tokenNumber,
924 tokenTrue,
925 tokenFalse,
926 tokenNull,
927 tokenNaN,
928 tokenPosInf,
929 tokenNegInf,
930 tokenArraySeparator,
931 tokenMemberSeparator,
932 tokenComment,
933 tokenError
934 };
935
936 class Token {
937 public:
938 TokenType type_;
939 Location start_;
940 Location end_;
941 };
942
943 class ErrorInfo {
944 public:
945 Token token_;
946 String message_;
947 Location extra_;
948 };
949
950 typedef std::deque<ErrorInfo> Errors;
951
952 bool readToken(Token& token);
953 void skipSpaces();
954 void skipBom(bool skipBom);
955 bool match(const Char* pattern, int patternLength);
956 bool readComment();
957 bool readCStyleComment(bool* containsNewLineResult);
958 bool readCppStyleComment();
959 bool readString();
960 bool readStringSingleQuote();
961 bool readNumber(bool checkInf);
962 bool readValue();
963 bool readObject(Token& token);
964 bool readArray(Token& token);
965 bool decodeNumber(Token& token);
966 bool decodeNumber(Token& token, Value& decoded);
967 bool decodeString(Token& token);
968 bool decodeString(Token& token, String& decoded);
969 bool decodeDouble(Token& token);
970 bool decodeDouble(Token& token, Value& decoded);
971 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
972 unsigned int& unicode);
973 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
974 Location end, unsigned int& unicode);
975 bool addError(const String& message, Token& token,
976 Location extra = JSONCPP_NULL);
977 bool recoverFromError(TokenType skipUntilToken);
978 bool addErrorAndRecover(const String& message, Token& token,
979 TokenType skipUntilToken);
980 void skipUntilSpace();
981 Value& currentValue();
982 Char getNextChar();
983 void getLocationLineAndColumn(Location location, int& line,
984 int& column) const;
985 String getLocationLineAndColumn(Location location) const;
986 void addComment(Location begin, Location end, CommentPlacement placement);
987 void skipCommentTokens(Token& token);
988
989 static String normalizeEOL(Location begin, Location end);
990 static bool containsNewLine(Location begin, Location end);
991
992 typedef std::stack<Value*> Nodes;
993
994 Nodes nodes_;
995 Errors errors_;
996 String document_;
997 Location begin_;
998 Location end_;
999 Location current_;
1000 Location lastValueEnd_;
1001 Value* lastValue_;
1002 bool lastValueHasAComment_;
1003 String commentsBefore_;
1004
1005 OurFeatures const features_;
1006 bool collectComments_;
1007 }; // OurReader
1008
1009 // complete copy of Read impl, for OurReader
1010
containsNewLine(OurReader::Location begin,OurReader::Location end)1011 bool OurReader::containsNewLine(OurReader::Location begin,
1012 OurReader::Location end) {
1013 for (; begin < end; ++begin)
1014 if (*begin == '\n' || *begin == '\r')
1015 return true;
1016 return false;
1017 }
1018
OurReader(OurFeatures const & features)1019 OurReader::OurReader(OurFeatures const& features)
1020 : errors_(), document_(), begin_(JSONCPP_NULL), end_(JSONCPP_NULL),
1021 current_(JSONCPP_NULL), lastValueEnd_(JSONCPP_NULL),
1022 lastValue_(JSONCPP_NULL), lastValueHasAComment_(false), commentsBefore_(),
1023 features_(features), collectComments_(false) {}
1024
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1025 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1026 bool collectComments) {
1027 if (!features_.allowComments_) {
1028 collectComments = false;
1029 }
1030
1031 begin_ = beginDoc;
1032 end_ = endDoc;
1033 collectComments_ = collectComments;
1034 current_ = begin_;
1035 lastValueEnd_ = JSONCPP_NULL;
1036 lastValue_ = JSONCPP_NULL;
1037 commentsBefore_.clear();
1038 errors_.clear();
1039 while (!nodes_.empty())
1040 nodes_.pop();
1041 nodes_.push(&root);
1042
1043 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1044 skipBom(features_.skipBom_);
1045 bool successful = readValue();
1046 nodes_.pop();
1047 Token token;
1048 skipCommentTokens(token);
1049 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1050 addError("Extra non-whitespace after JSON value.", token);
1051 return false;
1052 }
1053 if (collectComments_ && !commentsBefore_.empty())
1054 root.setComment(commentsBefore_, commentAfter);
1055 if (features_.strictRoot_) {
1056 if (!root.isArray() && !root.isObject()) {
1057 // Set error location to start of doc, ideally should be first token found
1058 // in doc
1059 token.type_ = tokenError;
1060 token.start_ = beginDoc;
1061 token.end_ = endDoc;
1062 addError(
1063 "A valid JSON document must be either an array or an object value.",
1064 token);
1065 return false;
1066 }
1067 }
1068 return successful;
1069 }
1070
readValue()1071 bool OurReader::readValue() {
1072 // To preserve the old behaviour we cast size_t to int.
1073 if (nodes_.size() > features_.stackLimit_)
1074 throwRuntimeError("Exceeded stackLimit in readValue().");
1075 Token token;
1076 skipCommentTokens(token);
1077 bool successful = true;
1078
1079 if (collectComments_ && !commentsBefore_.empty()) {
1080 currentValue().setComment(commentsBefore_, commentBefore);
1081 commentsBefore_.clear();
1082 }
1083
1084 switch (token.type_) {
1085 case tokenObjectBegin:
1086 successful = readObject(token);
1087 currentValue().setOffsetLimit(current_ - begin_);
1088 break;
1089 case tokenArrayBegin:
1090 successful = readArray(token);
1091 currentValue().setOffsetLimit(current_ - begin_);
1092 break;
1093 case tokenNumber:
1094 successful = decodeNumber(token);
1095 break;
1096 case tokenString:
1097 successful = decodeString(token);
1098 break;
1099 case tokenTrue: {
1100 Value v(true);
1101 currentValue().swapPayload(v);
1102 currentValue().setOffsetStart(token.start_ - begin_);
1103 currentValue().setOffsetLimit(token.end_ - begin_);
1104 } break;
1105 case tokenFalse: {
1106 Value v(false);
1107 currentValue().swapPayload(v);
1108 currentValue().setOffsetStart(token.start_ - begin_);
1109 currentValue().setOffsetLimit(token.end_ - begin_);
1110 } break;
1111 case tokenNull: {
1112 Value v;
1113 currentValue().swapPayload(v);
1114 currentValue().setOffsetStart(token.start_ - begin_);
1115 currentValue().setOffsetLimit(token.end_ - begin_);
1116 } break;
1117 case tokenNaN: {
1118 Value v(std::numeric_limits<double>::quiet_NaN());
1119 currentValue().swapPayload(v);
1120 currentValue().setOffsetStart(token.start_ - begin_);
1121 currentValue().setOffsetLimit(token.end_ - begin_);
1122 } break;
1123 case tokenPosInf: {
1124 Value v(std::numeric_limits<double>::infinity());
1125 currentValue().swapPayload(v);
1126 currentValue().setOffsetStart(token.start_ - begin_);
1127 currentValue().setOffsetLimit(token.end_ - begin_);
1128 } break;
1129 case tokenNegInf: {
1130 Value v(-std::numeric_limits<double>::infinity());
1131 currentValue().swapPayload(v);
1132 currentValue().setOffsetStart(token.start_ - begin_);
1133 currentValue().setOffsetLimit(token.end_ - begin_);
1134 } break;
1135 case tokenArraySeparator:
1136 case tokenObjectEnd:
1137 case tokenArrayEnd:
1138 if (features_.allowDroppedNullPlaceholders_) {
1139 // "Un-read" the current token and mark the current value as a null
1140 // token.
1141 current_--;
1142 Value v;
1143 currentValue().swapPayload(v);
1144 currentValue().setOffsetStart(current_ - begin_ - 1);
1145 currentValue().setOffsetLimit(current_ - begin_);
1146 break;
1147 } // else, fall through ...
1148 default:
1149 currentValue().setOffsetStart(token.start_ - begin_);
1150 currentValue().setOffsetLimit(token.end_ - begin_);
1151 return addError("Syntax error: value, object or array expected.", token);
1152 }
1153
1154 if (collectComments_) {
1155 lastValueEnd_ = current_;
1156 lastValueHasAComment_ = false;
1157 lastValue_ = ¤tValue();
1158 }
1159
1160 return successful;
1161 }
1162
skipCommentTokens(Token & token)1163 void OurReader::skipCommentTokens(Token& token) {
1164 if (features_.allowComments_) {
1165 do {
1166 readToken(token);
1167 } while (token.type_ == tokenComment);
1168 } else {
1169 readToken(token);
1170 }
1171 }
1172
readToken(Token & token)1173 bool OurReader::readToken(Token& token) {
1174 skipSpaces();
1175 token.start_ = current_;
1176 Char c = getNextChar();
1177 bool ok = true;
1178 switch (c) {
1179 case '{':
1180 token.type_ = tokenObjectBegin;
1181 break;
1182 case '}':
1183 token.type_ = tokenObjectEnd;
1184 break;
1185 case '[':
1186 token.type_ = tokenArrayBegin;
1187 break;
1188 case ']':
1189 token.type_ = tokenArrayEnd;
1190 break;
1191 case '"':
1192 token.type_ = tokenString;
1193 ok = readString();
1194 break;
1195 case '\'':
1196 if (features_.allowSingleQuotes_) {
1197 token.type_ = tokenString;
1198 ok = readStringSingleQuote();
1199 break;
1200 } // else fall through
1201 case '/':
1202 token.type_ = tokenComment;
1203 ok = readComment();
1204 break;
1205 case '0':
1206 case '1':
1207 case '2':
1208 case '3':
1209 case '4':
1210 case '5':
1211 case '6':
1212 case '7':
1213 case '8':
1214 case '9':
1215 token.type_ = tokenNumber;
1216 readNumber(false);
1217 break;
1218 case '-':
1219 if (readNumber(true)) {
1220 token.type_ = tokenNumber;
1221 } else {
1222 token.type_ = tokenNegInf;
1223 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1224 }
1225 break;
1226 case '+':
1227 if (readNumber(true)) {
1228 token.type_ = tokenNumber;
1229 } else {
1230 token.type_ = tokenPosInf;
1231 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1232 }
1233 break;
1234 case 't':
1235 token.type_ = tokenTrue;
1236 ok = match("rue", 3);
1237 break;
1238 case 'f':
1239 token.type_ = tokenFalse;
1240 ok = match("alse", 4);
1241 break;
1242 case 'n':
1243 token.type_ = tokenNull;
1244 ok = match("ull", 3);
1245 break;
1246 case 'N':
1247 if (features_.allowSpecialFloats_) {
1248 token.type_ = tokenNaN;
1249 ok = match("aN", 2);
1250 } else {
1251 ok = false;
1252 }
1253 break;
1254 case 'I':
1255 if (features_.allowSpecialFloats_) {
1256 token.type_ = tokenPosInf;
1257 ok = match("nfinity", 7);
1258 } else {
1259 ok = false;
1260 }
1261 break;
1262 case ',':
1263 token.type_ = tokenArraySeparator;
1264 break;
1265 case ':':
1266 token.type_ = tokenMemberSeparator;
1267 break;
1268 case 0:
1269 token.type_ = tokenEndOfStream;
1270 break;
1271 default:
1272 ok = false;
1273 break;
1274 }
1275 if (!ok)
1276 token.type_ = tokenError;
1277 token.end_ = current_;
1278 return ok;
1279 }
1280
skipSpaces()1281 void OurReader::skipSpaces() {
1282 while (current_ != end_) {
1283 Char c = *current_;
1284 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1285 ++current_;
1286 else
1287 break;
1288 }
1289 }
1290
skipBom(bool skipBom)1291 void OurReader::skipBom(bool skipBom) {
1292 // The default behavior is to skip BOM.
1293 if (skipBom) {
1294 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1295 begin_ += 3;
1296 current_ = begin_;
1297 }
1298 }
1299 }
1300
match(const Char * pattern,int patternLength)1301 bool OurReader::match(const Char* pattern, int patternLength) {
1302 if (end_ - current_ < patternLength)
1303 return false;
1304 int index = patternLength;
1305 while (index--)
1306 if (current_[index] != pattern[index])
1307 return false;
1308 current_ += patternLength;
1309 return true;
1310 }
1311
readComment()1312 bool OurReader::readComment() {
1313 const Location commentBegin = current_ - 1;
1314 const Char c = getNextChar();
1315 bool successful = false;
1316 bool cStyleWithEmbeddedNewline = false;
1317
1318 const bool isCStyleComment = (c == '*');
1319 const bool isCppStyleComment = (c == '/');
1320 if (isCStyleComment) {
1321 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1322 } else if (isCppStyleComment) {
1323 successful = readCppStyleComment();
1324 }
1325
1326 if (!successful)
1327 return false;
1328
1329 if (collectComments_) {
1330 CommentPlacement placement = commentBefore;
1331
1332 if (!lastValueHasAComment_) {
1333 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1334 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1335 placement = commentAfterOnSameLine;
1336 lastValueHasAComment_ = true;
1337 }
1338 }
1339 }
1340
1341 addComment(commentBegin, current_, placement);
1342 }
1343 return true;
1344 }
1345
normalizeEOL(OurReader::Location begin,OurReader::Location end)1346 String OurReader::normalizeEOL(OurReader::Location begin,
1347 OurReader::Location end) {
1348 String normalized;
1349 normalized.reserve(static_cast<size_t>(end - begin));
1350 OurReader::Location current = begin;
1351 while (current != end) {
1352 char c = *current++;
1353 if (c == '\r') {
1354 if (current != end && *current == '\n')
1355 // convert dos EOL
1356 ++current;
1357 // convert Mac EOL
1358 normalized += '\n';
1359 } else {
1360 normalized += c;
1361 }
1362 }
1363 return normalized;
1364 }
1365
addComment(Location begin,Location end,CommentPlacement placement)1366 void OurReader::addComment(Location begin, Location end,
1367 CommentPlacement placement) {
1368 assert(collectComments_);
1369 const String& normalized = normalizeEOL(begin, end);
1370 if (placement == commentAfterOnSameLine) {
1371 assert(lastValue_ != JSONCPP_NULL);
1372 lastValue_->setComment(normalized, placement);
1373 } else {
1374 commentsBefore_ += normalized;
1375 }
1376 }
1377
readCStyleComment(bool * containsNewLineResult)1378 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1379 *containsNewLineResult = false;
1380
1381 while ((current_ + 1) < end_) {
1382 Char c = getNextChar();
1383 if (c == '*' && *current_ == '/')
1384 break;
1385 if (c == '\n')
1386 *containsNewLineResult = true;
1387 }
1388
1389 return getNextChar() == '/';
1390 }
1391
readCppStyleComment()1392 bool OurReader::readCppStyleComment() {
1393 while (current_ != end_) {
1394 Char c = getNextChar();
1395 if (c == '\n')
1396 break;
1397 if (c == '\r') {
1398 // Consume DOS EOL. It will be normalized in addComment.
1399 if (current_ != end_ && *current_ == '\n')
1400 getNextChar();
1401 // Break on Moc OS 9 EOL.
1402 break;
1403 }
1404 }
1405 return true;
1406 }
1407
readNumber(bool checkInf)1408 bool OurReader::readNumber(bool checkInf) {
1409 Location p = current_;
1410 if (checkInf && p != end_ && *p == 'I') {
1411 current_ = ++p;
1412 return false;
1413 }
1414 char c = '0'; // stopgap for already consumed character
1415 // integral part
1416 while (c >= '0' && c <= '9')
1417 c = (current_ = p) < end_ ? *p++ : '\0';
1418 // fractional part
1419 if (c == '.') {
1420 c = (current_ = p) < end_ ? *p++ : '\0';
1421 while (c >= '0' && c <= '9')
1422 c = (current_ = p) < end_ ? *p++ : '\0';
1423 }
1424 // exponential part
1425 if (c == 'e' || c == 'E') {
1426 c = (current_ = p) < end_ ? *p++ : '\0';
1427 if (c == '+' || c == '-')
1428 c = (current_ = p) < end_ ? *p++ : '\0';
1429 while (c >= '0' && c <= '9')
1430 c = (current_ = p) < end_ ? *p++ : '\0';
1431 }
1432 return true;
1433 }
readString()1434 bool OurReader::readString() {
1435 Char c = 0;
1436 while (current_ != end_) {
1437 c = getNextChar();
1438 if (c == '\\')
1439 getNextChar();
1440 else if (c == '"')
1441 break;
1442 }
1443 return c == '"';
1444 }
1445
readStringSingleQuote()1446 bool OurReader::readStringSingleQuote() {
1447 Char c = 0;
1448 while (current_ != end_) {
1449 c = getNextChar();
1450 if (c == '\\')
1451 getNextChar();
1452 else if (c == '\'')
1453 break;
1454 }
1455 return c == '\'';
1456 }
1457
readObject(Token & token)1458 bool OurReader::readObject(Token& token) {
1459 Token tokenName;
1460 String name;
1461 Value init(objectValue);
1462 currentValue().swapPayload(init);
1463 currentValue().setOffsetStart(token.start_ - begin_);
1464 while (readToken(tokenName)) {
1465 bool initialTokenOk = true;
1466 while (tokenName.type_ == tokenComment && initialTokenOk)
1467 initialTokenOk = readToken(tokenName);
1468 if (!initialTokenOk)
1469 break;
1470 if (tokenName.type_ == tokenObjectEnd &&
1471 (name.empty() ||
1472 features_.allowTrailingCommas_)) // empty object or trailing comma
1473 return true;
1474 name.clear();
1475 if (tokenName.type_ == tokenString) {
1476 if (!decodeString(tokenName, name))
1477 return recoverFromError(tokenObjectEnd);
1478 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1479 Value numberName;
1480 if (!decodeNumber(tokenName, numberName))
1481 return recoverFromError(tokenObjectEnd);
1482 name = numberName.asString();
1483 } else {
1484 break;
1485 }
1486 if (name.length() >= (1U << 30))
1487 throwRuntimeError("keylength >= 2^30");
1488 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1489 String msg = "Duplicate key: '" + name + "'";
1490 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1491 }
1492
1493 Token colon;
1494 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1495 return addErrorAndRecover("Missing ':' after object member name", colon,
1496 tokenObjectEnd);
1497 }
1498 Value& value = currentValue()[name];
1499 nodes_.push(&value);
1500 bool ok = readValue();
1501 nodes_.pop();
1502 if (!ok) // error already set
1503 return recoverFromError(tokenObjectEnd);
1504
1505 Token comma;
1506 if (!readToken(comma) ||
1507 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1508 comma.type_ != tokenComment)) {
1509 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1510 comma, tokenObjectEnd);
1511 }
1512 bool finalizeTokenOk = true;
1513 while (comma.type_ == tokenComment && finalizeTokenOk)
1514 finalizeTokenOk = readToken(comma);
1515 if (comma.type_ == tokenObjectEnd)
1516 return true;
1517 }
1518 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1519 tokenObjectEnd);
1520 }
1521
readArray(Token & token)1522 bool OurReader::readArray(Token& token) {
1523 Value init(arrayValue);
1524 currentValue().swapPayload(init);
1525 currentValue().setOffsetStart(token.start_ - begin_);
1526 int index = 0;
1527 for (;;) {
1528 skipSpaces();
1529 if (current_ != end_ && *current_ == ']' &&
1530 (index == 0 ||
1531 (features_.allowTrailingCommas_ &&
1532 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1533 // comma
1534 {
1535 Token endArray;
1536 readToken(endArray);
1537 return true;
1538 }
1539 Value& value = currentValue()[index++];
1540 nodes_.push(&value);
1541 bool ok = readValue();
1542 nodes_.pop();
1543 if (!ok) // error already set
1544 return recoverFromError(tokenArrayEnd);
1545
1546 Token currentToken;
1547 // Accept Comment after last item in the array.
1548 ok = readToken(currentToken);
1549 while (currentToken.type_ == tokenComment && ok) {
1550 ok = readToken(currentToken);
1551 }
1552 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1553 currentToken.type_ != tokenArrayEnd);
1554 if (!ok || badTokenType) {
1555 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1556 currentToken, tokenArrayEnd);
1557 }
1558 if (currentToken.type_ == tokenArrayEnd)
1559 break;
1560 }
1561 return true;
1562 }
1563
decodeNumber(Token & token)1564 bool OurReader::decodeNumber(Token& token) {
1565 Value decoded;
1566 if (!decodeNumber(token, decoded))
1567 return false;
1568 currentValue().swapPayload(decoded);
1569 currentValue().setOffsetStart(token.start_ - begin_);
1570 currentValue().setOffsetLimit(token.end_ - begin_);
1571 return true;
1572 }
1573
decodeNumber(Token & token,Value & decoded)1574 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1575 // Attempts to parse the number as an integer. If the number is
1576 // larger than the maximum supported value of an integer then
1577 // we decode the number as a double.
1578 Location current = token.start_;
1579 const bool isNegative = *current == '-';
1580 if (isNegative) {
1581 ++current;
1582 }
1583
1584 // We assume we can represent the largest and smallest integer types as
1585 // unsigned integers with separate sign. This is only true if they can fit
1586 // into an unsigned integer.
1587 JSONCPP_STATIC_ASSERT(LargestUInt(Value::maxLargestInt) <=
1588 Value::maxLargestUInt,
1589 "Int must be smaller than Uint");
1590 // We need to convert minLargestInt into a positive number. The easiest way
1591 // to do this conversion is to assume our "threshold" value of minLargestInt
1592 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1593 // be a safe assumption.
1594 JSONCPP_STATIC_ASSERT(
1595 Value::minLargestInt <= -Value::maxLargestInt,
1596 "The absolute value of minLargestInt must ve greater than or"
1597 "equal to maxLargestInt");
1598
1599 JSONCPP_STATIC_ASSERT(
1600 Value::minLargestInt / 10 >= -Value::maxLargestInt,
1601 "The absolute value of minLargestInt must be only 1 magnitude"
1602 "larger than maxLargestInt");
1603
1604 static JSONCPP_CONST Value::LargestUInt positive_threshold =
1605 Value::maxLargestUInt / 10;
1606 static JSONCPP_CONST Value::UInt positive_last_digit =
1607 Value::maxLargestUInt % 10;
1608
1609 // For the negative values, we have to be more careful. Since typically
1610 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1611 // then take the inverse. This assumes that minLargestInt is only a single
1612 // power of 10 different in magnitude, which we check above. For the last
1613 // digit, we take the modulus before negating for the same reason.
1614 static JSONCPP_CONST Value::LargestUInt negative_threshold =
1615 Value::LargestUInt(-(Value::minLargestInt / 10));
1616 static JSONCPP_CONST Value::UInt negative_last_digit =
1617 Value::UInt(-(Value::minLargestInt % 10));
1618
1619 const Value::LargestUInt threshold =
1620 isNegative ? negative_threshold : positive_threshold;
1621 const Value::UInt max_last_digit =
1622 isNegative ? negative_last_digit : positive_last_digit;
1623
1624 Value::LargestUInt value = 0;
1625 while (current < token.end_) {
1626 Char c = *current++;
1627 if (c < '0' || c > '9')
1628 return decodeDouble(token, decoded);
1629
1630 const Value::UInt digit(static_cast<Value::UInt>(c - '0'));
1631 if (value >= threshold) {
1632 // We've hit or exceeded the max value divided by 10 (rounded down). If
1633 // a) we've only just touched the limit, meaing value == threshold,
1634 // b) this is the last digit, or
1635 // c) it's small enough to fit in that rounding delta, we're okay.
1636 // Otherwise treat this number as a double to avoid overflow.
1637 if (value > threshold || current != token.end_ ||
1638 digit > max_last_digit) {
1639 return decodeDouble(token, decoded);
1640 }
1641 }
1642 value = value * 10 + digit;
1643 }
1644
1645 if (isNegative) {
1646 // We use the same magnitude assumption here, just in case.
1647 const Value::UInt last_digit = static_cast<Value::UInt>(value % 10);
1648 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1649 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1650 decoded = Value::LargestInt(value);
1651 } else {
1652 decoded = value;
1653 }
1654
1655 return true;
1656 }
1657
decodeDouble(Token & token)1658 bool OurReader::decodeDouble(Token& token) {
1659 Value decoded;
1660 if (!decodeDouble(token, decoded))
1661 return false;
1662 currentValue().swapPayload(decoded);
1663 currentValue().setOffsetStart(token.start_ - begin_);
1664 currentValue().setOffsetLimit(token.end_ - begin_);
1665 return true;
1666 }
1667
decodeDouble(Token & token,Value & decoded)1668 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1669 double value = 0;
1670 const String buffer(token.start_, token.end_);
1671 IStringStream is(buffer);
1672 if (!(is >> value)) {
1673 // the value could be lower than numeric_limits<double>::min(), in this situtation we should return the value with the gurantee
1674 // of conversion which has been performed and no occurances of range error.
1675 if ((value > 0 && value < std::numeric_limits<double>::min()) || (value < 0 && value > -std::numeric_limits<double>::min())) {
1676 decoded = value;
1677 return true;
1678 }
1679 return addError(
1680 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1681 }
1682 decoded = value;
1683 return true;
1684 }
1685
decodeString(Token & token)1686 bool OurReader::decodeString(Token& token) {
1687 String decoded_string;
1688 if (!decodeString(token, decoded_string))
1689 return false;
1690 Value decoded(decoded_string);
1691 currentValue().swapPayload(decoded);
1692 currentValue().setOffsetStart(token.start_ - begin_);
1693 currentValue().setOffsetLimit(token.end_ - begin_);
1694 return true;
1695 }
1696
decodeString(Token & token,String & decoded)1697 bool OurReader::decodeString(Token& token, String& decoded) {
1698 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1699 Location current = token.start_ + 1; // skip '"'
1700 Location end = token.end_ - 1; // do not include '"'
1701 while (current != end) {
1702 Char c = *current++;
1703 if (c == '"')
1704 break;
1705 if (c == '\\') {
1706 if (current == end)
1707 return addError("Empty escape sequence in string", token, current);
1708 Char escape = *current++;
1709 switch (escape) {
1710 case '"':
1711 decoded += '"';
1712 break;
1713 case '/':
1714 decoded += '/';
1715 break;
1716 case '\\':
1717 decoded += '\\';
1718 break;
1719 case 'b':
1720 decoded += '\b';
1721 break;
1722 case 'f':
1723 decoded += '\f';
1724 break;
1725 case 'n':
1726 decoded += '\n';
1727 break;
1728 case 'r':
1729 decoded += '\r';
1730 break;
1731 case 't':
1732 decoded += '\t';
1733 break;
1734 case 'u': {
1735 unsigned int unicode;
1736 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1737 return false;
1738 decoded += codePointToUTF8(unicode);
1739 } break;
1740 default:
1741 return addError("Bad escape sequence in string", token, current);
1742 }
1743 } else {
1744 decoded += c;
1745 }
1746 }
1747 return true;
1748 }
1749
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1750 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1751 Location end, unsigned int& unicode) {
1752
1753 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1754 return false;
1755 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1756 // surrogate pairs
1757 if (end - current < 6)
1758 return addError(
1759 "additional six characters expected to parse unicode surrogate pair.",
1760 token, current);
1761 if (*(current++) == '\\' && *(current++) == 'u') {
1762 unsigned int surrogatePair;
1763 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1764 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1765 } else
1766 return false;
1767 } else
1768 return addError("expecting another \\u token to begin the second half of "
1769 "a unicode surrogate pair",
1770 token, current);
1771 }
1772 return true;
1773 }
1774
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1775 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1776 Location end,
1777 unsigned int& ret_unicode) {
1778 if (end - current < 4)
1779 return addError(
1780 "Bad unicode escape sequence in string: four digits expected.", token,
1781 current);
1782 int unicode = 0;
1783 for (int index = 0; index < 4; ++index) {
1784 Char c = *current++;
1785 unicode *= 16;
1786 if (c >= '0' && c <= '9')
1787 unicode += c - '0';
1788 else if (c >= 'a' && c <= 'f')
1789 unicode += c - 'a' + 10;
1790 else if (c >= 'A' && c <= 'F')
1791 unicode += c - 'A' + 10;
1792 else
1793 return addError(
1794 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1795 token, current);
1796 }
1797 ret_unicode = static_cast<unsigned int>(unicode);
1798 return true;
1799 }
1800
addError(const String & message,Token & token,Location extra)1801 bool OurReader::addError(const String& message, Token& token, Location extra) {
1802 ErrorInfo info;
1803 info.token_ = token;
1804 info.message_ = message;
1805 info.extra_ = extra;
1806 errors_.push_back(info);
1807 return false;
1808 }
1809
recoverFromError(TokenType skipUntilToken)1810 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1811 size_t errorCount = errors_.size();
1812 Token skip;
1813 for (;;) {
1814 if (!readToken(skip))
1815 errors_.resize(errorCount); // discard errors caused by recovery
1816 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1817 break;
1818 }
1819 errors_.resize(errorCount);
1820 return false;
1821 }
1822
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1823 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1824 TokenType skipUntilToken) {
1825 addError(message, token);
1826 return recoverFromError(skipUntilToken);
1827 }
1828
currentValue()1829 Value& OurReader::currentValue() { return *(nodes_.top()); }
1830
getNextChar()1831 OurReader::Char OurReader::getNextChar() {
1832 if (current_ == end_)
1833 return 0;
1834 return *current_++;
1835 }
1836
getLocationLineAndColumn(Location location,int & line,int & column) const1837 void OurReader::getLocationLineAndColumn(Location location, int& line,
1838 int& column) const {
1839 Location current = begin_;
1840 Location lastLineStart = current;
1841 line = 0;
1842 while (current < location && current != end_) {
1843 Char c = *current++;
1844 if (c == '\r') {
1845 // Add boundary check to avoid cross the border
1846 if (current == end_) {
1847 break;
1848 }
1849 if (*current == '\n')
1850 ++current;
1851 lastLineStart = current;
1852 ++line;
1853 } else if (c == '\n') {
1854 lastLineStart = current;
1855 ++line;
1856 }
1857 }
1858 // column & line start at 1
1859 column = int(location - lastLineStart) + 1;
1860 ++line;
1861 }
1862
getLocationLineAndColumn(Location location) const1863 String OurReader::getLocationLineAndColumn(Location location) const {
1864 int line, column;
1865 getLocationLineAndColumn(location, line, column);
1866 char buffer[18 + 16 + 16 + 1];
1867 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1868 return buffer;
1869 }
1870
getFormattedErrorMessages() const1871 String OurReader::getFormattedErrorMessages() const {
1872 String formattedMessage;
1873 for (Errors::const_iterator itError = errors_.begin();
1874 itError != errors_.end(); ++itError) {
1875 const ErrorInfo& error = *itError;
1876 formattedMessage +=
1877 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1878 formattedMessage += " " + error.message_ + "\n";
1879 if (error.extra_)
1880 formattedMessage +=
1881 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1882 }
1883 return formattedMessage;
1884 }
1885
getStructuredErrors() const1886 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1887 std::vector<OurReader::StructuredError> allErrors;
1888 for (Errors::const_iterator itError = errors_.begin();
1889 itError != errors_.end(); ++itError) {
1890 const ErrorInfo& error = *itError;
1891 OurReader::StructuredError structured;
1892 structured.offset_start = error.token_.start_ - begin_;
1893 structured.offset_limit = error.token_.end_ - begin_;
1894 structured.message = error.message_;
1895 allErrors.push_back(structured);
1896 }
1897 return allErrors;
1898 }
1899
1900 class OurCharReader : public CharReader {
1901 bool const collectComments_;
1902 OurReader reader_;
1903
1904 public:
OurCharReader(bool collectComments,OurFeatures const & features)1905 OurCharReader(bool collectComments, OurFeatures const& features)
1906 : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1907 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1908 String* errs) JSONCPP_OVERRIDE {
1909 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1910 if (errs) {
1911 *errs = reader_.getFormattedErrorMessages();
1912 }
1913 return ok;
1914 }
1915 };
1916
CharReaderBuilder()1917 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
~CharReaderBuilder()1918 CharReaderBuilder::~CharReaderBuilder() {}
newCharReader() const1919 CharReader* CharReaderBuilder::newCharReader() const {
1920 bool collectComments = settings_["collectComments"].asBool();
1921 OurFeatures features = OurFeatures::all();
1922 features.allowComments_ = settings_["allowComments"].asBool();
1923 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1924 features.strictRoot_ = settings_["strictRoot"].asBool();
1925 features.allowDroppedNullPlaceholders_ =
1926 settings_["allowDroppedNullPlaceholders"].asBool();
1927 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1928 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1929
1930 // Stack limit is always a size_t, so we get this as an unsigned int
1931 // regardless of it we have 64-bit integer support enabled.
1932 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1933 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1934 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1935 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1936 features.skipBom_ = settings_["skipBom"].asBool();
1937 return new OurCharReader(collectComments, features);
1938 }
getValidReaderKeys(std::set<String> * valid_keys)1939 static void getValidReaderKeys(std::set<String>* valid_keys) {
1940 valid_keys->clear();
1941 valid_keys->insert("collectComments");
1942 valid_keys->insert("allowComments");
1943 valid_keys->insert("allowTrailingCommas");
1944 valid_keys->insert("strictRoot");
1945 valid_keys->insert("allowDroppedNullPlaceholders");
1946 valid_keys->insert("allowNumericKeys");
1947 valid_keys->insert("allowSingleQuotes");
1948 valid_keys->insert("stackLimit");
1949 valid_keys->insert("failIfExtra");
1950 valid_keys->insert("rejectDupKeys");
1951 valid_keys->insert("allowSpecialFloats");
1952 valid_keys->insert("skipBom");
1953 }
validate(Json::Value * invalid) const1954 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1955 Json::Value my_invalid;
1956 if (!invalid)
1957 invalid = &my_invalid; // so we do not need to test for NULL
1958 Json::Value& inv = *invalid;
1959 std::set<String> valid_keys;
1960 getValidReaderKeys(&valid_keys);
1961 Value::Members keys = settings_.getMemberNames();
1962 size_t n = keys.size();
1963 for (size_t i = 0; i < n; ++i) {
1964 String const& key = keys[i];
1965 if (valid_keys.find(key) == valid_keys.end()) {
1966 inv[key] = settings_[key];
1967 }
1968 }
1969 return inv.empty();
1970 }
operator [](const String & key)1971 Value& CharReaderBuilder::operator[](const String& key) {
1972 return settings_[key];
1973 }
1974 // static
strictMode(Json::Value * settings)1975 void CharReaderBuilder::strictMode(Json::Value* settings) {
1976 //! [CharReaderBuilderStrictMode]
1977 (*settings)["allowComments"] = false;
1978 (*settings)["allowTrailingCommas"] = false;
1979 (*settings)["strictRoot"] = true;
1980 (*settings)["allowDroppedNullPlaceholders"] = false;
1981 (*settings)["allowNumericKeys"] = false;
1982 (*settings)["allowSingleQuotes"] = false;
1983 (*settings)["stackLimit"] = 1000;
1984 (*settings)["failIfExtra"] = true;
1985 (*settings)["rejectDupKeys"] = true;
1986 (*settings)["allowSpecialFloats"] = false;
1987 (*settings)["skipBom"] = true;
1988 //! [CharReaderBuilderStrictMode]
1989 }
1990 // static
setDefaults(Json::Value * settings)1991 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1992 //! [CharReaderBuilderDefaults]
1993 (*settings)["collectComments"] = true;
1994 (*settings)["allowComments"] = true;
1995 (*settings)["allowTrailingCommas"] = true;
1996 (*settings)["strictRoot"] = false;
1997 (*settings)["allowDroppedNullPlaceholders"] = false;
1998 (*settings)["allowNumericKeys"] = false;
1999 (*settings)["allowSingleQuotes"] = false;
2000 (*settings)["stackLimit"] = 1000;
2001 (*settings)["failIfExtra"] = false;
2002 (*settings)["rejectDupKeys"] = false;
2003 (*settings)["allowSpecialFloats"] = false;
2004 (*settings)["skipBom"] = true;
2005 //! [CharReaderBuilderDefaults]
2006 }
2007
2008 //////////////////////////////////
2009 // global functions
2010
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)2011 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
2012 String* errs) {
2013 OStringStream ssin;
2014 ssin << sin.rdbuf();
2015 String doc = ssin.str();
2016 char const* begin = doc.data();
2017 char const* end = begin + doc.size();
2018 // Note that we do not actually need a null-terminator.
2019 CharReaderPtr const reader(fact.newCharReader());
2020 bool ret = reader->parse(begin, end, root, errs);
2021 delete reader;
2022 return ret;
2023 }
2024
operator >>(IStream & sin,Value & root)2025 IStream& operator>>(IStream& sin, Value& root) {
2026 CharReaderBuilder b;
2027 String errs;
2028 bool ok = parseFromStream(b, sin, &root, &errs);
2029 if (!ok) {
2030 throwRuntimeError(errs);
2031 }
2032 return sin;
2033 }
2034
2035 } // namespace Json
2036