1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <istream>
16 #include <limits>
17 #include <memory>
18 #include <set>
19 #include <sstream>
20 #include <utility>
21
22 #include <cstdio>
23 #if __cplusplus >= 201103L
24
25 #if !defined(sscanf)
26 #define sscanf std::sscanf
27 #endif
28
29 #endif //__cplusplus
30
31 #if defined(_MSC_VER)
32 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
33 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
34 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
35 #endif //_MSC_VER
36
37 #if defined(_MSC_VER)
38 // Disable warning about strdup being deprecated.
39 #pragma warning(disable : 4996)
40 #endif
41
42 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
43 // time to change the stack limit
44 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
45 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
46 #endif
47
48 static size_t const stackLimit_g =
49 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
50
51 namespace Json {
52
53 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
54 typedef std::unique_ptr<CharReader> CharReaderPtr;
55 #else
56 typedef std::auto_ptr<CharReader> CharReaderPtr;
57 #endif
58
59 // Implementation of class Features
60 // ////////////////////////////////
61
62 Features::Features() = default;
63
all()64 Features Features::all() { return {}; }
65
strictMode()66 Features Features::strictMode() {
67 Features features;
68 features.allowComments_ = false;
69 features.strictRoot_ = true;
70 features.allowDroppedNullPlaceholders_ = false;
71 features.allowNumericKeys_ = false;
72 return features;
73 }
74
75 // Implementation of class Reader
76 // ////////////////////////////////
77
containsNewLine(Reader::Location begin,Reader::Location end)78 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
79 for (; begin < end; ++begin)
80 if (*begin == '\n' || *begin == '\r')
81 return true;
82 return false;
83 }
84
85 // Class Reader
86 // //////////////////////////////////////////////////////////////////
87
Reader()88 Reader::Reader()
89 : errors_(), document_(), commentsBefore_(), features_(Features::all()) {}
90
Reader(const Features & features)91 Reader::Reader(const Features& features)
92 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95
parse(const std::string & document,Value & root,bool collectComments)96 bool Reader::parse(const std::string& document,
97 Value& root,
98 bool collectComments) {
99 document_.assign(document.begin(), document.end());
100 const char* begin = document_.c_str();
101 const char* end = begin + document_.length();
102 return parse(begin, end, root, collectComments);
103 }
104
parse(std::istream & is,Value & root,bool collectComments)105 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
106 // std::istream_iterator<char> begin(is);
107 // std::istream_iterator<char> end;
108 // Those would allow streamed input from a file, if parse() were a
109 // template function.
110
111 // Since String is reference-counted, this at least does not
112 // create an extra copy.
113 String doc;
114 std::getline(is, doc, (char)EOF);
115 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
116 }
117
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)118 bool Reader::parse(const char* beginDoc,
119 const char* endDoc,
120 Value& root,
121 bool collectComments) {
122 if (!features_.allowComments_) {
123 collectComments = false;
124 }
125
126 begin_ = beginDoc;
127 end_ = endDoc;
128 collectComments_ = collectComments;
129 current_ = begin_;
130 lastValueEnd_ = nullptr;
131 lastValue_ = nullptr;
132 commentsBefore_.clear();
133 errors_.clear();
134 while (!nodes_.empty())
135 nodes_.pop();
136 nodes_.push(&root);
137
138 bool successful = readValue();
139 Token token;
140 skipCommentTokens(token);
141 if (collectComments_ && !commentsBefore_.empty())
142 root.setComment(commentsBefore_, commentAfter);
143 if (features_.strictRoot_) {
144 if (!root.isArray() && !root.isObject()) {
145 // Set error location to start of doc, ideally should be first token found
146 // in doc
147 token.type_ = tokenError;
148 token.start_ = beginDoc;
149 token.end_ = endDoc;
150 addError(
151 "A valid JSON document must be either an array or an object value.",
152 token);
153 return false;
154 }
155 }
156 return successful;
157 }
158
readValue()159 bool Reader::readValue() {
160 // readValue() may call itself only if it calls readObject() or ReadArray().
161 // These methods execute nodes_.push() just before and nodes_.pop)() just
162 // after calling readValue(). parse() executes one nodes_.push(), so > instead
163 // of >=.
164 if (nodes_.size() > stackLimit_g)
165 throwRuntimeError("Exceeded stackLimit in readValue().");
166
167 Token token;
168 skipCommentTokens(token);
169 bool successful = true;
170
171 if (collectComments_ && !commentsBefore_.empty()) {
172 currentValue().setComment(commentsBefore_, commentBefore);
173 commentsBefore_.clear();
174 }
175
176 switch (token.type_) {
177 case tokenObjectBegin:
178 successful = readObject(token);
179 currentValue().setOffsetLimit(current_ - begin_);
180 break;
181 case tokenArrayBegin:
182 successful = readArray(token);
183 currentValue().setOffsetLimit(current_ - begin_);
184 break;
185 case tokenNumber:
186 successful = decodeNumber(token);
187 break;
188 case tokenString:
189 successful = decodeString(token);
190 break;
191 case tokenTrue: {
192 Value v(true);
193 currentValue().swapPayload(v);
194 currentValue().setOffsetStart(token.start_ - begin_);
195 currentValue().setOffsetLimit(token.end_ - begin_);
196 } break;
197 case tokenFalse: {
198 Value v(false);
199 currentValue().swapPayload(v);
200 currentValue().setOffsetStart(token.start_ - begin_);
201 currentValue().setOffsetLimit(token.end_ - begin_);
202 } break;
203 case tokenNull: {
204 Value v;
205 currentValue().swapPayload(v);
206 currentValue().setOffsetStart(token.start_ - begin_);
207 currentValue().setOffsetLimit(token.end_ - begin_);
208 } break;
209 case tokenArraySeparator:
210 case tokenObjectEnd:
211 case tokenArrayEnd:
212 if (features_.allowDroppedNullPlaceholders_) {
213 // "Un-read" the current token and mark the current value as a null
214 // token.
215 current_--;
216 Value v;
217 currentValue().swapPayload(v);
218 currentValue().setOffsetStart(current_ - begin_ - 1);
219 currentValue().setOffsetLimit(current_ - begin_);
220 break;
221 } // Else, fall through...
222 default:
223 currentValue().setOffsetStart(token.start_ - begin_);
224 currentValue().setOffsetLimit(token.end_ - begin_);
225 return addError("Syntax error: value, object or array expected.", token);
226 }
227
228 if (collectComments_) {
229 lastValueEnd_ = current_;
230 lastValue_ = ¤tValue();
231 }
232
233 return successful;
234 }
235
skipCommentTokens(Token & token)236 void Reader::skipCommentTokens(Token& token) {
237 if (features_.allowComments_) {
238 do {
239 readToken(token);
240 } while (token.type_ == tokenComment);
241 } else {
242 readToken(token);
243 }
244 }
245
readToken(Token & token)246 bool Reader::readToken(Token& token) {
247 skipSpaces();
248 token.start_ = current_;
249 Char c = getNextChar();
250 bool ok = true;
251 switch (c) {
252 case '{':
253 token.type_ = tokenObjectBegin;
254 break;
255 case '}':
256 token.type_ = tokenObjectEnd;
257 break;
258 case '[':
259 token.type_ = tokenArrayBegin;
260 break;
261 case ']':
262 token.type_ = tokenArrayEnd;
263 break;
264 case '"':
265 token.type_ = tokenString;
266 ok = readString();
267 break;
268 case '/':
269 token.type_ = tokenComment;
270 ok = readComment();
271 break;
272 case '0':
273 case '1':
274 case '2':
275 case '3':
276 case '4':
277 case '5':
278 case '6':
279 case '7':
280 case '8':
281 case '9':
282 case '-':
283 token.type_ = tokenNumber;
284 readNumber();
285 break;
286 case 't':
287 token.type_ = tokenTrue;
288 ok = match("rue", 3);
289 break;
290 case 'f':
291 token.type_ = tokenFalse;
292 ok = match("alse", 4);
293 break;
294 case 'n':
295 token.type_ = tokenNull;
296 ok = match("ull", 3);
297 break;
298 case ',':
299 token.type_ = tokenArraySeparator;
300 break;
301 case ':':
302 token.type_ = tokenMemberSeparator;
303 break;
304 case 0:
305 token.type_ = tokenEndOfStream;
306 break;
307 default:
308 ok = false;
309 break;
310 }
311 if (!ok)
312 token.type_ = tokenError;
313 token.end_ = current_;
314 return true;
315 }
316
skipSpaces()317 void Reader::skipSpaces() {
318 while (current_ != end_) {
319 Char c = *current_;
320 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
321 ++current_;
322 else
323 break;
324 }
325 }
326
match(Location pattern,int patternLength)327 bool Reader::match(Location pattern, int patternLength) {
328 if (end_ - current_ < patternLength)
329 return false;
330 int index = patternLength;
331 while (index--)
332 if (current_[index] != pattern[index])
333 return false;
334 current_ += patternLength;
335 return true;
336 }
337
readComment()338 bool Reader::readComment() {
339 Location commentBegin = current_ - 1;
340 Char c = getNextChar();
341 bool successful = false;
342 if (c == '*')
343 successful = readCStyleComment();
344 else if (c == '/')
345 successful = readCppStyleComment();
346 if (!successful)
347 return false;
348
349 if (collectComments_) {
350 CommentPlacement placement = commentBefore;
351 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
352 if (c != '*' || !containsNewLine(commentBegin, current_))
353 placement = commentAfterOnSameLine;
354 }
355
356 addComment(commentBegin, current_, placement);
357 }
358 return true;
359 }
360
normalizeEOL(Reader::Location begin,Reader::Location end)361 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
362 String normalized;
363 normalized.reserve(static_cast<size_t>(end - begin));
364 Reader::Location current = begin;
365 while (current != end) {
366 char c = *current++;
367 if (c == '\r') {
368 if (current != end && *current == '\n')
369 // convert dos EOL
370 ++current;
371 // convert Mac EOL
372 normalized += '\n';
373 } else {
374 normalized += c;
375 }
376 }
377 return normalized;
378 }
379
addComment(Location begin,Location end,CommentPlacement placement)380 void Reader::addComment(Location begin,
381 Location end,
382 CommentPlacement placement) {
383 assert(collectComments_);
384 const String& normalized = normalizeEOL(begin, end);
385 if (placement == commentAfterOnSameLine) {
386 assert(lastValue_ != nullptr);
387 lastValue_->setComment(normalized, placement);
388 } else {
389 commentsBefore_ += normalized;
390 }
391 }
392
readCStyleComment()393 bool Reader::readCStyleComment() {
394 while ((current_ + 1) < end_) {
395 Char c = getNextChar();
396 if (c == '*' && *current_ == '/')
397 break;
398 }
399 return getNextChar() == '/';
400 }
401
readCppStyleComment()402 bool Reader::readCppStyleComment() {
403 while (current_ != end_) {
404 Char c = getNextChar();
405 if (c == '\n')
406 break;
407 if (c == '\r') {
408 // Consume DOS EOL. It will be normalized in addComment.
409 if (current_ != end_ && *current_ == '\n')
410 getNextChar();
411 // Break on Moc OS 9 EOL.
412 break;
413 }
414 }
415 return true;
416 }
417
readNumber()418 void Reader::readNumber() {
419 const char* p = current_;
420 char c = '0'; // stopgap for already consumed character
421 // integral part
422 while (c >= '0' && c <= '9')
423 c = (current_ = p) < end_ ? *p++ : '\0';
424 // fractional part
425 if (c == '.') {
426 c = (current_ = p) < end_ ? *p++ : '\0';
427 while (c >= '0' && c <= '9')
428 c = (current_ = p) < end_ ? *p++ : '\0';
429 }
430 // exponential part
431 if (c == 'e' || c == 'E') {
432 c = (current_ = p) < end_ ? *p++ : '\0';
433 if (c == '+' || c == '-')
434 c = (current_ = p) < end_ ? *p++ : '\0';
435 while (c >= '0' && c <= '9')
436 c = (current_ = p) < end_ ? *p++ : '\0';
437 }
438 }
439
readString()440 bool Reader::readString() {
441 Char c = '\0';
442 while (current_ != end_) {
443 c = getNextChar();
444 if (c == '\\')
445 getNextChar();
446 else if (c == '"')
447 break;
448 }
449 return c == '"';
450 }
451
readObject(Token & token)452 bool Reader::readObject(Token& token) {
453 Token tokenName;
454 String name;
455 Value init(objectValue);
456 currentValue().swapPayload(init);
457 currentValue().setOffsetStart(token.start_ - begin_);
458 while (readToken(tokenName)) {
459 bool initialTokenOk = true;
460 while (tokenName.type_ == tokenComment && initialTokenOk)
461 initialTokenOk = readToken(tokenName);
462 if (!initialTokenOk)
463 break;
464 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
465 return true;
466 name.clear();
467 if (tokenName.type_ == tokenString) {
468 if (!decodeString(tokenName, name))
469 return recoverFromError(tokenObjectEnd);
470 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
471 Value numberName;
472 if (!decodeNumber(tokenName, numberName))
473 return recoverFromError(tokenObjectEnd);
474 name = String(numberName.asCString());
475 } else {
476 break;
477 }
478
479 Token colon;
480 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
481 return addErrorAndRecover("Missing ':' after object member name", colon,
482 tokenObjectEnd);
483 }
484 Value& value = currentValue()[name];
485 nodes_.push(&value);
486 bool ok = readValue();
487 nodes_.pop();
488 if (!ok) // error already set
489 return recoverFromError(tokenObjectEnd);
490
491 Token comma;
492 if (!readToken(comma) ||
493 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
494 comma.type_ != tokenComment)) {
495 return addErrorAndRecover("Missing ',' or '}' in object declaration",
496 comma, tokenObjectEnd);
497 }
498 bool finalizeTokenOk = true;
499 while (comma.type_ == tokenComment && finalizeTokenOk)
500 finalizeTokenOk = readToken(comma);
501 if (comma.type_ == tokenObjectEnd)
502 return true;
503 }
504 return addErrorAndRecover("Missing '}' or object member name", tokenName,
505 tokenObjectEnd);
506 }
507
readArray(Token & token)508 bool Reader::readArray(Token& token) {
509 Value init(arrayValue);
510 currentValue().swapPayload(init);
511 currentValue().setOffsetStart(token.start_ - begin_);
512 skipSpaces();
513 if (current_ != end_ && *current_ == ']') // empty array
514 {
515 Token endArray;
516 readToken(endArray);
517 return true;
518 }
519 int index = 0;
520 for (;;) {
521 Value& value = currentValue()[index++];
522 nodes_.push(&value);
523 bool ok = readValue();
524 nodes_.pop();
525 if (!ok) // error already set
526 return recoverFromError(tokenArrayEnd);
527
528 Token currentToken;
529 // Accept Comment after last item in the array.
530 ok = readToken(currentToken);
531 while (currentToken.type_ == tokenComment && ok) {
532 ok = readToken(currentToken);
533 }
534 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
535 currentToken.type_ != tokenArrayEnd);
536 if (!ok || badTokenType) {
537 return addErrorAndRecover("Missing ',' or ']' in array declaration",
538 currentToken, tokenArrayEnd);
539 }
540 if (currentToken.type_ == tokenArrayEnd)
541 break;
542 }
543 return true;
544 }
545
decodeNumber(Token & token)546 bool Reader::decodeNumber(Token& token) {
547 Value decoded;
548 if (!decodeNumber(token, decoded))
549 return false;
550 currentValue().swapPayload(decoded);
551 currentValue().setOffsetStart(token.start_ - begin_);
552 currentValue().setOffsetLimit(token.end_ - begin_);
553 return true;
554 }
555
decodeNumber(Token & token,Value & decoded)556 bool Reader::decodeNumber(Token& token, Value& decoded) {
557 // Attempts to parse the number as an integer. If the number is
558 // larger than the maximum supported value of an integer then
559 // we decode the number as a double.
560 Location current = token.start_;
561 bool isNegative = *current == '-';
562 if (isNegative)
563 ++current;
564 // TODO: Help the compiler do the div and mod at compile time or get rid of
565 // them.
566 Value::LargestUInt maxIntegerValue =
567 isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
568 : Value::maxLargestUInt;
569 Value::LargestUInt threshold = maxIntegerValue / 10;
570 Value::LargestUInt value = 0;
571 while (current < token.end_) {
572 Char c = *current++;
573 if (c < '0' || c > '9')
574 return decodeDouble(token, decoded);
575 auto digit(static_cast<Value::UInt>(c - '0'));
576 if (value >= threshold) {
577 // We've hit or exceeded the max value divided by 10 (rounded down). If
578 // a) we've only just touched the limit, b) this is the last digit, and
579 // c) it's small enough to fit in that rounding delta, we're okay.
580 // Otherwise treat this number as a double to avoid overflow.
581 if (value > threshold || current != token.end_ ||
582 digit > maxIntegerValue % 10) {
583 return decodeDouble(token, decoded);
584 }
585 }
586 value = value * 10 + digit;
587 }
588 if (isNegative && value == maxIntegerValue)
589 decoded = Value::minLargestInt;
590 else if (isNegative)
591 decoded = -Value::LargestInt(value);
592 else if (value <= Value::LargestUInt(Value::maxInt))
593 decoded = Value::LargestInt(value);
594 else
595 decoded = value;
596 return true;
597 }
598
decodeDouble(Token & token)599 bool Reader::decodeDouble(Token& token) {
600 Value decoded;
601 if (!decodeDouble(token, decoded))
602 return false;
603 currentValue().swapPayload(decoded);
604 currentValue().setOffsetStart(token.start_ - begin_);
605 currentValue().setOffsetLimit(token.end_ - begin_);
606 return true;
607 }
608
decodeDouble(Token & token,Value & decoded)609 bool Reader::decodeDouble(Token& token, Value& decoded) {
610 double value = 0;
611 String buffer(token.start_, token.end_);
612 IStringStream is(buffer);
613 if (!(is >> value))
614 return addError(
615 "'" + String(token.start_, token.end_) + "' is not a number.", token);
616 decoded = value;
617 return true;
618 }
619
decodeString(Token & token)620 bool Reader::decodeString(Token& token) {
621 String decoded_string;
622 if (!decodeString(token, decoded_string))
623 return false;
624 Value decoded(decoded_string);
625 currentValue().swapPayload(decoded);
626 currentValue().setOffsetStart(token.start_ - begin_);
627 currentValue().setOffsetLimit(token.end_ - begin_);
628 return true;
629 }
630
decodeString(Token & token,String & decoded)631 bool Reader::decodeString(Token& token, String& decoded) {
632 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
633 Location current = token.start_ + 1; // skip '"'
634 Location end = token.end_ - 1; // do not include '"'
635 while (current != end) {
636 Char c = *current++;
637 if (c == '"')
638 break;
639 else if (c == '\\') {
640 if (current == end)
641 return addError("Empty escape sequence in string", token, current);
642 Char escape = *current++;
643 switch (escape) {
644 case '"':
645 decoded += '"';
646 break;
647 case '/':
648 decoded += '/';
649 break;
650 case '\\':
651 decoded += '\\';
652 break;
653 case 'b':
654 decoded += '\b';
655 break;
656 case 'f':
657 decoded += '\f';
658 break;
659 case 'n':
660 decoded += '\n';
661 break;
662 case 'r':
663 decoded += '\r';
664 break;
665 case 't':
666 decoded += '\t';
667 break;
668 case 'u': {
669 unsigned int unicode;
670 if (!decodeUnicodeCodePoint(token, current, end, unicode))
671 return false;
672 decoded += codePointToUTF8(unicode);
673 } break;
674 default:
675 return addError("Bad escape sequence in string", token, current);
676 }
677 } else {
678 decoded += c;
679 }
680 }
681 return true;
682 }
683
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)684 bool Reader::decodeUnicodeCodePoint(Token& token,
685 Location& current,
686 Location end,
687 unsigned int& unicode) {
688
689 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
690 return false;
691 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
692 // surrogate pairs
693 if (end - current < 6)
694 return addError(
695 "additional six characters expected to parse unicode surrogate pair.",
696 token, current);
697 if (*(current++) == '\\' && *(current++) == 'u') {
698 unsigned int surrogatePair;
699 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
700 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
701 } else
702 return false;
703 } else
704 return addError("expecting another \\u token to begin the second half of "
705 "a unicode surrogate pair",
706 token, current);
707 }
708 return true;
709 }
710
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)711 bool Reader::decodeUnicodeEscapeSequence(Token& token,
712 Location& current,
713 Location end,
714 unsigned int& ret_unicode) {
715 if (end - current < 4)
716 return addError(
717 "Bad unicode escape sequence in string: four digits expected.", token,
718 current);
719 int unicode = 0;
720 for (int index = 0; index < 4; ++index) {
721 Char c = *current++;
722 unicode *= 16;
723 if (c >= '0' && c <= '9')
724 unicode += c - '0';
725 else if (c >= 'a' && c <= 'f')
726 unicode += c - 'a' + 10;
727 else if (c >= 'A' && c <= 'F')
728 unicode += c - 'A' + 10;
729 else
730 return addError(
731 "Bad unicode escape sequence in string: hexadecimal digit expected.",
732 token, current);
733 }
734 ret_unicode = static_cast<unsigned int>(unicode);
735 return true;
736 }
737
addError(const String & message,Token & token,Location extra)738 bool Reader::addError(const String& message, Token& token, Location extra) {
739 ErrorInfo info;
740 info.token_ = token;
741 info.message_ = message;
742 info.extra_ = extra;
743 errors_.push_back(info);
744 return false;
745 }
746
recoverFromError(TokenType skipUntilToken)747 bool Reader::recoverFromError(TokenType skipUntilToken) {
748 size_t const errorCount = errors_.size();
749 Token skip;
750 for (;;) {
751 if (!readToken(skip))
752 errors_.resize(errorCount); // discard errors caused by recovery
753 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
754 break;
755 }
756 errors_.resize(errorCount);
757 return false;
758 }
759
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)760 bool Reader::addErrorAndRecover(const String& message,
761 Token& token,
762 TokenType skipUntilToken) {
763 addError(message, token);
764 return recoverFromError(skipUntilToken);
765 }
766
currentValue()767 Value& Reader::currentValue() { return *(nodes_.top()); }
768
getNextChar()769 Reader::Char Reader::getNextChar() {
770 if (current_ == end_)
771 return 0;
772 return *current_++;
773 }
774
getLocationLineAndColumn(Location location,int & line,int & column) const775 void Reader::getLocationLineAndColumn(Location location,
776 int& line,
777 int& column) const {
778 Location current = begin_;
779 Location lastLineStart = current;
780 line = 0;
781 while (current < location && current != end_) {
782 Char c = *current++;
783 if (c == '\r') {
784 if (*current == '\n')
785 ++current;
786 lastLineStart = current;
787 ++line;
788 } else if (c == '\n') {
789 lastLineStart = current;
790 ++line;
791 }
792 }
793 // column & line start at 1
794 column = int(location - lastLineStart) + 1;
795 ++line;
796 }
797
getLocationLineAndColumn(Location location) const798 String Reader::getLocationLineAndColumn(Location location) const {
799 int line, column;
800 getLocationLineAndColumn(location, line, column);
801 char buffer[18 + 16 + 16 + 1];
802 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
803 return buffer;
804 }
805
806 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const807 String Reader::getFormatedErrorMessages() const {
808 return getFormattedErrorMessages();
809 }
810
getFormattedErrorMessages() const811 String Reader::getFormattedErrorMessages() const {
812 String formattedMessage;
813 for (const auto& error : errors_) {
814 formattedMessage +=
815 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
816 formattedMessage += " " + error.message_ + "\n";
817 if (error.extra_)
818 formattedMessage +=
819 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
820 }
821 return formattedMessage;
822 }
823
getStructuredErrors() const824 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
825 std::vector<Reader::StructuredError> allErrors;
826 for (const auto& error : errors_) {
827 Reader::StructuredError structured;
828 structured.offset_start = error.token_.start_ - begin_;
829 structured.offset_limit = error.token_.end_ - begin_;
830 structured.message = error.message_;
831 allErrors.push_back(structured);
832 }
833 return allErrors;
834 }
835
pushError(const Value & value,const String & message)836 bool Reader::pushError(const Value& value, const String& message) {
837 ptrdiff_t const length = end_ - begin_;
838 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
839 return false;
840 Token token;
841 token.type_ = tokenError;
842 token.start_ = begin_ + value.getOffsetStart();
843 token.end_ = begin_ + value.getOffsetLimit();
844 ErrorInfo info;
845 info.token_ = token;
846 info.message_ = message;
847 info.extra_ = nullptr;
848 errors_.push_back(info);
849 return true;
850 }
851
pushError(const Value & value,const String & message,const Value & extra)852 bool Reader::pushError(const Value& value,
853 const String& message,
854 const Value& extra) {
855 ptrdiff_t const length = end_ - begin_;
856 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
857 extra.getOffsetLimit() > length)
858 return false;
859 Token token;
860 token.type_ = tokenError;
861 token.start_ = begin_ + value.getOffsetStart();
862 token.end_ = begin_ + value.getOffsetLimit();
863 ErrorInfo info;
864 info.token_ = token;
865 info.message_ = message;
866 info.extra_ = begin_ + extra.getOffsetStart();
867 errors_.push_back(info);
868 return true;
869 }
870
good() const871 bool Reader::good() const { return errors_.empty(); }
872
873 // Originally copied from the Features class (now deprecated), used internally
874 // for features implementation.
875 class OurFeatures {
876 public:
877 static OurFeatures all();
878 bool allowComments_;
879 bool strictRoot_;
880 bool allowDroppedNullPlaceholders_;
881 bool allowNumericKeys_;
882 bool allowSingleQuotes_;
883 bool failIfExtra_;
884 bool rejectDupKeys_;
885 bool allowSpecialFloats_;
886 size_t stackLimit_;
887 }; // OurFeatures
888
all()889 OurFeatures OurFeatures::all() { return {}; }
890
891 // Implementation of class Reader
892 // ////////////////////////////////
893
894 // Originally copied from the Reader class (now deprecated), used internally
895 // for implementing JSON reading.
896 class OurReader {
897 public:
898 typedef char Char;
899 typedef const Char* Location;
900 struct StructuredError {
901 ptrdiff_t offset_start;
902 ptrdiff_t offset_limit;
903 String message;
904 };
905
906 OurReader(OurFeatures const& features);
907 bool parse(const char* beginDoc,
908 const char* endDoc,
909 Value& root,
910 bool collectComments = true);
911 String getFormattedErrorMessages() const;
912 std::vector<StructuredError> getStructuredErrors() const;
913 bool pushError(const Value& value, const String& message);
914 bool pushError(const Value& value, const String& message, const Value& extra);
915 bool good() const;
916
917 private:
918 OurReader(OurReader const&); // no impl
919 void operator=(OurReader const&); // no impl
920
921 enum TokenType {
922 tokenEndOfStream = 0,
923 tokenObjectBegin,
924 tokenObjectEnd,
925 tokenArrayBegin,
926 tokenArrayEnd,
927 tokenString,
928 tokenNumber,
929 tokenTrue,
930 tokenFalse,
931 tokenNull,
932 tokenNaN,
933 tokenPosInf,
934 tokenNegInf,
935 tokenArraySeparator,
936 tokenMemberSeparator,
937 tokenComment,
938 tokenError
939 };
940
941 class Token {
942 public:
943 TokenType type_;
944 Location start_;
945 Location end_;
946 };
947
948 class ErrorInfo {
949 public:
950 Token token_;
951 String message_;
952 Location extra_;
953 };
954
955 typedef std::deque<ErrorInfo> Errors;
956
957 bool readToken(Token& token);
958 void skipSpaces();
959 bool match(Location pattern, int patternLength);
960 bool readComment();
961 bool readCStyleComment();
962 bool readCppStyleComment();
963 bool readString();
964 bool readStringSingleQuote();
965 bool readNumber(bool checkInf);
966 bool readValue();
967 bool readObject(Token& token);
968 bool readArray(Token& token);
969 bool decodeNumber(Token& token);
970 bool decodeNumber(Token& token, Value& decoded);
971 bool decodeString(Token& token);
972 bool decodeString(Token& token, String& decoded);
973 bool decodeDouble(Token& token);
974 bool decodeDouble(Token& token, Value& decoded);
975 bool decodeUnicodeCodePoint(Token& token,
976 Location& current,
977 Location end,
978 unsigned int& unicode);
979 bool decodeUnicodeEscapeSequence(Token& token,
980 Location& current,
981 Location end,
982 unsigned int& unicode);
983 bool addError(const String& message, Token& token, Location extra = nullptr);
984 bool recoverFromError(TokenType skipUntilToken);
985 bool addErrorAndRecover(const String& message,
986 Token& token,
987 TokenType skipUntilToken);
988 void skipUntilSpace();
989 Value& currentValue();
990 Char getNextChar();
991 void
992 getLocationLineAndColumn(Location location, int& line, int& column) const;
993 String getLocationLineAndColumn(Location location) const;
994 void addComment(Location begin, Location end, CommentPlacement placement);
995 void skipCommentTokens(Token& token);
996
997 static String normalizeEOL(Location begin, Location end);
998 static bool containsNewLine(Location begin, Location end);
999
1000 typedef std::stack<Value*> Nodes;
1001 Nodes nodes_;
1002 Errors errors_;
1003 String document_;
1004 Location begin_;
1005 Location end_;
1006 Location current_;
1007 Location lastValueEnd_;
1008 Value* lastValue_;
1009 String commentsBefore_;
1010
1011 OurFeatures const features_;
1012 bool collectComments_;
1013 }; // OurReader
1014
1015 // complete copy of Read impl, for OurReader
1016
containsNewLine(OurReader::Location begin,OurReader::Location end)1017 bool OurReader::containsNewLine(OurReader::Location begin,
1018 OurReader::Location end) {
1019 for (; begin < end; ++begin)
1020 if (*begin == '\n' || *begin == '\r')
1021 return true;
1022 return false;
1023 }
1024
OurReader(OurFeatures const & features)1025 OurReader::OurReader(OurFeatures const& features)
1026 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1027 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1028 }
1029
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1030 bool OurReader::parse(const char* beginDoc,
1031 const char* endDoc,
1032 Value& root,
1033 bool collectComments) {
1034 if (!features_.allowComments_) {
1035 collectComments = false;
1036 }
1037
1038 begin_ = beginDoc;
1039 end_ = endDoc;
1040 collectComments_ = collectComments;
1041 current_ = begin_;
1042 lastValueEnd_ = nullptr;
1043 lastValue_ = nullptr;
1044 commentsBefore_.clear();
1045 errors_.clear();
1046 while (!nodes_.empty())
1047 nodes_.pop();
1048 nodes_.push(&root);
1049
1050 bool successful = readValue();
1051 nodes_.pop();
1052 Token token;
1053 skipCommentTokens(token);
1054 if (features_.failIfExtra_) {
1055 if ((features_.strictRoot_ || token.type_ != tokenError) &&
1056 token.type_ != tokenEndOfStream) {
1057 addError("Extra non-whitespace after JSON value.", token);
1058 return false;
1059 }
1060 }
1061 if (collectComments_ && !commentsBefore_.empty())
1062 root.setComment(commentsBefore_, commentAfter);
1063 if (features_.strictRoot_) {
1064 if (!root.isArray() && !root.isObject()) {
1065 // Set error location to start of doc, ideally should be first token found
1066 // in doc
1067 token.type_ = tokenError;
1068 token.start_ = beginDoc;
1069 token.end_ = endDoc;
1070 addError(
1071 "A valid JSON document must be either an array or an object value.",
1072 token);
1073 return false;
1074 }
1075 }
1076 return successful;
1077 }
1078
readValue()1079 bool OurReader::readValue() {
1080 // To preserve the old behaviour we cast size_t to int.
1081 if (nodes_.size() > features_.stackLimit_)
1082 throwRuntimeError("Exceeded stackLimit in readValue().");
1083 Token token;
1084 skipCommentTokens(token);
1085 bool successful = true;
1086
1087 if (collectComments_ && !commentsBefore_.empty()) {
1088 currentValue().setComment(commentsBefore_, commentBefore);
1089 commentsBefore_.clear();
1090 }
1091
1092 switch (token.type_) {
1093 case tokenObjectBegin:
1094 successful = readObject(token);
1095 currentValue().setOffsetLimit(current_ - begin_);
1096 break;
1097 case tokenArrayBegin:
1098 successful = readArray(token);
1099 currentValue().setOffsetLimit(current_ - begin_);
1100 break;
1101 case tokenNumber:
1102 successful = decodeNumber(token);
1103 break;
1104 case tokenString:
1105 successful = decodeString(token);
1106 break;
1107 case tokenTrue: {
1108 Value v(true);
1109 currentValue().swapPayload(v);
1110 currentValue().setOffsetStart(token.start_ - begin_);
1111 currentValue().setOffsetLimit(token.end_ - begin_);
1112 } break;
1113 case tokenFalse: {
1114 Value v(false);
1115 currentValue().swapPayload(v);
1116 currentValue().setOffsetStart(token.start_ - begin_);
1117 currentValue().setOffsetLimit(token.end_ - begin_);
1118 } break;
1119 case tokenNull: {
1120 Value v;
1121 currentValue().swapPayload(v);
1122 currentValue().setOffsetStart(token.start_ - begin_);
1123 currentValue().setOffsetLimit(token.end_ - begin_);
1124 } break;
1125 case tokenNaN: {
1126 Value v(std::numeric_limits<double>::quiet_NaN());
1127 currentValue().swapPayload(v);
1128 currentValue().setOffsetStart(token.start_ - begin_);
1129 currentValue().setOffsetLimit(token.end_ - begin_);
1130 } break;
1131 case tokenPosInf: {
1132 Value v(std::numeric_limits<double>::infinity());
1133 currentValue().swapPayload(v);
1134 currentValue().setOffsetStart(token.start_ - begin_);
1135 currentValue().setOffsetLimit(token.end_ - begin_);
1136 } break;
1137 case tokenNegInf: {
1138 Value v(-std::numeric_limits<double>::infinity());
1139 currentValue().swapPayload(v);
1140 currentValue().setOffsetStart(token.start_ - begin_);
1141 currentValue().setOffsetLimit(token.end_ - begin_);
1142 } break;
1143 case tokenArraySeparator:
1144 case tokenObjectEnd:
1145 case tokenArrayEnd:
1146 if (features_.allowDroppedNullPlaceholders_) {
1147 // "Un-read" the current token and mark the current value as a null
1148 // token.
1149 current_--;
1150 Value v;
1151 currentValue().swapPayload(v);
1152 currentValue().setOffsetStart(current_ - begin_ - 1);
1153 currentValue().setOffsetLimit(current_ - begin_);
1154 break;
1155 } // else, fall through ...
1156 default:
1157 currentValue().setOffsetStart(token.start_ - begin_);
1158 currentValue().setOffsetLimit(token.end_ - begin_);
1159 return addError("Syntax error: value, object or array expected.", token);
1160 }
1161
1162 if (collectComments_) {
1163 lastValueEnd_ = current_;
1164 lastValue_ = ¤tValue();
1165 }
1166
1167 return successful;
1168 }
1169
skipCommentTokens(Token & token)1170 void OurReader::skipCommentTokens(Token& token) {
1171 if (features_.allowComments_) {
1172 do {
1173 readToken(token);
1174 } while (token.type_ == tokenComment);
1175 } else {
1176 readToken(token);
1177 }
1178 }
1179
readToken(Token & token)1180 bool OurReader::readToken(Token& token) {
1181 skipSpaces();
1182 token.start_ = current_;
1183 Char c = getNextChar();
1184 bool ok = true;
1185 switch (c) {
1186 case '{':
1187 token.type_ = tokenObjectBegin;
1188 break;
1189 case '}':
1190 token.type_ = tokenObjectEnd;
1191 break;
1192 case '[':
1193 token.type_ = tokenArrayBegin;
1194 break;
1195 case ']':
1196 token.type_ = tokenArrayEnd;
1197 break;
1198 case '"':
1199 token.type_ = tokenString;
1200 ok = readString();
1201 break;
1202 case '\'':
1203 if (features_.allowSingleQuotes_) {
1204 token.type_ = tokenString;
1205 ok = readStringSingleQuote();
1206 break;
1207 } // else fall through
1208 case '/':
1209 token.type_ = tokenComment;
1210 ok = readComment();
1211 break;
1212 case '0':
1213 case '1':
1214 case '2':
1215 case '3':
1216 case '4':
1217 case '5':
1218 case '6':
1219 case '7':
1220 case '8':
1221 case '9':
1222 token.type_ = tokenNumber;
1223 readNumber(false);
1224 break;
1225 case '-':
1226 if (readNumber(true)) {
1227 token.type_ = tokenNumber;
1228 } else {
1229 token.type_ = tokenNegInf;
1230 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1231 }
1232 break;
1233 case 't':
1234 token.type_ = tokenTrue;
1235 ok = match("rue", 3);
1236 break;
1237 case 'f':
1238 token.type_ = tokenFalse;
1239 ok = match("alse", 4);
1240 break;
1241 case 'n':
1242 token.type_ = tokenNull;
1243 ok = match("ull", 3);
1244 break;
1245 case 'N':
1246 if (features_.allowSpecialFloats_) {
1247 token.type_ = tokenNaN;
1248 ok = match("aN", 2);
1249 } else {
1250 ok = false;
1251 }
1252 break;
1253 case 'I':
1254 if (features_.allowSpecialFloats_) {
1255 token.type_ = tokenPosInf;
1256 ok = match("nfinity", 7);
1257 } else {
1258 ok = false;
1259 }
1260 break;
1261 case ',':
1262 token.type_ = tokenArraySeparator;
1263 break;
1264 case ':':
1265 token.type_ = tokenMemberSeparator;
1266 break;
1267 case 0:
1268 token.type_ = tokenEndOfStream;
1269 break;
1270 default:
1271 ok = false;
1272 break;
1273 }
1274 if (!ok)
1275 token.type_ = tokenError;
1276 token.end_ = current_;
1277 return true;
1278 }
1279
skipSpaces()1280 void OurReader::skipSpaces() {
1281 while (current_ != end_) {
1282 Char c = *current_;
1283 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1284 ++current_;
1285 else
1286 break;
1287 }
1288 }
1289
match(Location pattern,int patternLength)1290 bool OurReader::match(Location pattern, int patternLength) {
1291 if (end_ - current_ < patternLength)
1292 return false;
1293 int index = patternLength;
1294 while (index--)
1295 if (current_[index] != pattern[index])
1296 return false;
1297 current_ += patternLength;
1298 return true;
1299 }
1300
readComment()1301 bool OurReader::readComment() {
1302 Location commentBegin = current_ - 1;
1303 Char c = getNextChar();
1304 bool successful = false;
1305 if (c == '*')
1306 successful = readCStyleComment();
1307 else if (c == '/')
1308 successful = readCppStyleComment();
1309 if (!successful)
1310 return false;
1311
1312 if (collectComments_) {
1313 CommentPlacement placement = commentBefore;
1314 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1315 if (c != '*' || !containsNewLine(commentBegin, current_))
1316 placement = commentAfterOnSameLine;
1317 }
1318
1319 addComment(commentBegin, current_, placement);
1320 }
1321 return true;
1322 }
1323
normalizeEOL(OurReader::Location begin,OurReader::Location end)1324 String OurReader::normalizeEOL(OurReader::Location begin,
1325 OurReader::Location end) {
1326 String normalized;
1327 normalized.reserve(static_cast<size_t>(end - begin));
1328 OurReader::Location current = begin;
1329 while (current != end) {
1330 char c = *current++;
1331 if (c == '\r') {
1332 if (current != end && *current == '\n')
1333 // convert dos EOL
1334 ++current;
1335 // convert Mac EOL
1336 normalized += '\n';
1337 } else {
1338 normalized += c;
1339 }
1340 }
1341 return normalized;
1342 }
1343
addComment(Location begin,Location end,CommentPlacement placement)1344 void OurReader::addComment(Location begin,
1345 Location end,
1346 CommentPlacement placement) {
1347 assert(collectComments_);
1348 const String& normalized = normalizeEOL(begin, end);
1349 if (placement == commentAfterOnSameLine) {
1350 assert(lastValue_ != nullptr);
1351 lastValue_->setComment(normalized, placement);
1352 } else {
1353 commentsBefore_ += normalized;
1354 }
1355 }
1356
readCStyleComment()1357 bool OurReader::readCStyleComment() {
1358 while ((current_ + 1) < end_) {
1359 Char c = getNextChar();
1360 if (c == '*' && *current_ == '/')
1361 break;
1362 }
1363 return getNextChar() == '/';
1364 }
1365
readCppStyleComment()1366 bool OurReader::readCppStyleComment() {
1367 while (current_ != end_) {
1368 Char c = getNextChar();
1369 if (c == '\n')
1370 break;
1371 if (c == '\r') {
1372 // Consume DOS EOL. It will be normalized in addComment.
1373 if (current_ != end_ && *current_ == '\n')
1374 getNextChar();
1375 // Break on Moc OS 9 EOL.
1376 break;
1377 }
1378 }
1379 return true;
1380 }
1381
readNumber(bool checkInf)1382 bool OurReader::readNumber(bool checkInf) {
1383 const char* p = current_;
1384 if (checkInf && p != end_ && *p == 'I') {
1385 current_ = ++p;
1386 return false;
1387 }
1388 char c = '0'; // stopgap for already consumed character
1389 // integral part
1390 while (c >= '0' && c <= '9')
1391 c = (current_ = p) < end_ ? *p++ : '\0';
1392 // fractional part
1393 if (c == '.') {
1394 c = (current_ = p) < end_ ? *p++ : '\0';
1395 while (c >= '0' && c <= '9')
1396 c = (current_ = p) < end_ ? *p++ : '\0';
1397 }
1398 // exponential part
1399 if (c == 'e' || c == 'E') {
1400 c = (current_ = p) < end_ ? *p++ : '\0';
1401 if (c == '+' || c == '-')
1402 c = (current_ = p) < end_ ? *p++ : '\0';
1403 while (c >= '0' && c <= '9')
1404 c = (current_ = p) < end_ ? *p++ : '\0';
1405 }
1406 return true;
1407 }
readString()1408 bool OurReader::readString() {
1409 Char c = 0;
1410 while (current_ != end_) {
1411 c = getNextChar();
1412 if (c == '\\')
1413 getNextChar();
1414 else if (c == '"')
1415 break;
1416 }
1417 return c == '"';
1418 }
1419
readStringSingleQuote()1420 bool OurReader::readStringSingleQuote() {
1421 Char c = 0;
1422 while (current_ != end_) {
1423 c = getNextChar();
1424 if (c == '\\')
1425 getNextChar();
1426 else if (c == '\'')
1427 break;
1428 }
1429 return c == '\'';
1430 }
1431
readObject(Token & token)1432 bool OurReader::readObject(Token& token) {
1433 Token tokenName;
1434 String name;
1435 Value init(objectValue);
1436 currentValue().swapPayload(init);
1437 currentValue().setOffsetStart(token.start_ - begin_);
1438 while (readToken(tokenName)) {
1439 bool initialTokenOk = true;
1440 while (tokenName.type_ == tokenComment && initialTokenOk)
1441 initialTokenOk = readToken(tokenName);
1442 if (!initialTokenOk)
1443 break;
1444 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1445 return true;
1446 name.clear();
1447 if (tokenName.type_ == tokenString) {
1448 if (!decodeString(tokenName, name))
1449 return recoverFromError(tokenObjectEnd);
1450 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1451 Value numberName;
1452 if (!decodeNumber(tokenName, numberName))
1453 return recoverFromError(tokenObjectEnd);
1454 name = numberName.asString();
1455 } else {
1456 break;
1457 }
1458 if (name.length() >= (1U << 30))
1459 throwRuntimeError("keylength >= 2^30");
1460 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1461 String msg = "Duplicate key: '" + name + "'";
1462 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1463 }
1464
1465 Token colon;
1466 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1467 return addErrorAndRecover("Missing ':' after object member name", colon,
1468 tokenObjectEnd);
1469 }
1470 Value& value = currentValue()[name];
1471 nodes_.push(&value);
1472 bool ok = readValue();
1473 nodes_.pop();
1474 if (!ok) // error already set
1475 return recoverFromError(tokenObjectEnd);
1476
1477 Token comma;
1478 if (!readToken(comma) ||
1479 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1480 comma.type_ != tokenComment)) {
1481 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1482 comma, tokenObjectEnd);
1483 }
1484 bool finalizeTokenOk = true;
1485 while (comma.type_ == tokenComment && finalizeTokenOk)
1486 finalizeTokenOk = readToken(comma);
1487 if (comma.type_ == tokenObjectEnd)
1488 return true;
1489 }
1490 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1491 tokenObjectEnd);
1492 }
1493
readArray(Token & token)1494 bool OurReader::readArray(Token& token) {
1495 Value init(arrayValue);
1496 currentValue().swapPayload(init);
1497 currentValue().setOffsetStart(token.start_ - begin_);
1498 skipSpaces();
1499 if (current_ != end_ && *current_ == ']') // empty array
1500 {
1501 Token endArray;
1502 readToken(endArray);
1503 return true;
1504 }
1505 int index = 0;
1506 for (;;) {
1507 Value& value = currentValue()[index++];
1508 nodes_.push(&value);
1509 bool ok = readValue();
1510 nodes_.pop();
1511 if (!ok) // error already set
1512 return recoverFromError(tokenArrayEnd);
1513
1514 Token currentToken;
1515 // Accept Comment after last item in the array.
1516 ok = readToken(currentToken);
1517 while (currentToken.type_ == tokenComment && ok) {
1518 ok = readToken(currentToken);
1519 }
1520 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1521 currentToken.type_ != tokenArrayEnd);
1522 if (!ok || badTokenType) {
1523 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1524 currentToken, tokenArrayEnd);
1525 }
1526 if (currentToken.type_ == tokenArrayEnd)
1527 break;
1528 }
1529 return true;
1530 }
1531
decodeNumber(Token & token)1532 bool OurReader::decodeNumber(Token& token) {
1533 Value decoded;
1534 if (!decodeNumber(token, decoded))
1535 return false;
1536 currentValue().swapPayload(decoded);
1537 currentValue().setOffsetStart(token.start_ - begin_);
1538 currentValue().setOffsetLimit(token.end_ - begin_);
1539 return true;
1540 }
1541
decodeNumber(Token & token,Value & decoded)1542 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1543 // Attempts to parse the number as an integer. If the number is
1544 // larger than the maximum supported value of an integer then
1545 // we decode the number as a double.
1546 Location current = token.start_;
1547 bool isNegative = *current == '-';
1548 if (isNegative)
1549 ++current;
1550
1551 // TODO(issue #960): Change to constexpr
1552 static const auto positive_threshold = Value::maxLargestUInt / 10;
1553 static const auto positive_last_digit = Value::maxLargestUInt % 10;
1554 static const auto negative_threshold =
1555 Value::LargestUInt(Value::minLargestInt) / 10;
1556 static const auto negative_last_digit =
1557 Value::LargestUInt(Value::minLargestInt) % 10;
1558
1559 const auto threshold = isNegative ? negative_threshold : positive_threshold;
1560 const auto last_digit =
1561 isNegative ? negative_last_digit : positive_last_digit;
1562
1563 Value::LargestUInt value = 0;
1564 while (current < token.end_) {
1565 Char c = *current++;
1566 if (c < '0' || c > '9')
1567 return decodeDouble(token, decoded);
1568
1569 const auto digit(static_cast<Value::UInt>(c - '0'));
1570 if (value >= threshold) {
1571 // We've hit or exceeded the max value divided by 10 (rounded down). If
1572 // a) we've only just touched the limit, meaing value == threshold,
1573 // b) this is the last digit, or
1574 // c) it's small enough to fit in that rounding delta, we're okay.
1575 // Otherwise treat this number as a double to avoid overflow.
1576 if (value > threshold || current != token.end_ || digit > last_digit) {
1577 return decodeDouble(token, decoded);
1578 }
1579 }
1580 value = value * 10 + digit;
1581 }
1582
1583 if (isNegative)
1584 decoded = -Value::LargestInt(value);
1585 else if (value <= Value::LargestUInt(Value::maxLargestInt))
1586 decoded = Value::LargestInt(value);
1587 else
1588 decoded = value;
1589
1590 return true;
1591 }
1592
decodeDouble(Token & token)1593 bool OurReader::decodeDouble(Token& token) {
1594 Value decoded;
1595 if (!decodeDouble(token, decoded))
1596 return false;
1597 currentValue().swapPayload(decoded);
1598 currentValue().setOffsetStart(token.start_ - begin_);
1599 currentValue().setOffsetLimit(token.end_ - begin_);
1600 return true;
1601 }
1602
decodeDouble(Token & token,Value & decoded)1603 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1604 double value = 0;
1605 const int bufferSize = 32;
1606 int count;
1607 ptrdiff_t const length = token.end_ - token.start_;
1608
1609 // Sanity check to avoid buffer overflow exploits.
1610 if (length < 0) {
1611 return addError("Unable to parse token length", token);
1612 }
1613 auto const ulength = static_cast<size_t>(length);
1614
1615 // Avoid using a string constant for the format control string given to
1616 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1617 // info:
1618 //
1619 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1620 char format[] = "%lf";
1621
1622 if (length <= bufferSize) {
1623 Char buffer[bufferSize + 1];
1624 memcpy(buffer, token.start_, ulength);
1625 buffer[length] = 0;
1626 fixNumericLocaleInput(buffer, buffer + length);
1627 count = sscanf(buffer, format, &value);
1628 } else {
1629 String buffer(token.start_, token.end_);
1630 count = sscanf(buffer.c_str(), format, &value);
1631 }
1632
1633 if (count != 1)
1634 return addError(
1635 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1636 decoded = value;
1637 return true;
1638 }
1639
decodeString(Token & token)1640 bool OurReader::decodeString(Token& token) {
1641 String decoded_string;
1642 if (!decodeString(token, decoded_string))
1643 return false;
1644 Value decoded(decoded_string);
1645 currentValue().swapPayload(decoded);
1646 currentValue().setOffsetStart(token.start_ - begin_);
1647 currentValue().setOffsetLimit(token.end_ - begin_);
1648 return true;
1649 }
1650
decodeString(Token & token,String & decoded)1651 bool OurReader::decodeString(Token& token, String& decoded) {
1652 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1653 Location current = token.start_ + 1; // skip '"'
1654 Location end = token.end_ - 1; // do not include '"'
1655 while (current != end) {
1656 Char c = *current++;
1657 if (c == '"')
1658 break;
1659 else if (c == '\\') {
1660 if (current == end)
1661 return addError("Empty escape sequence in string", token, current);
1662 Char escape = *current++;
1663 switch (escape) {
1664 case '"':
1665 decoded += '"';
1666 break;
1667 case '/':
1668 decoded += '/';
1669 break;
1670 case '\\':
1671 decoded += '\\';
1672 break;
1673 case 'b':
1674 decoded += '\b';
1675 break;
1676 case 'f':
1677 decoded += '\f';
1678 break;
1679 case 'n':
1680 decoded += '\n';
1681 break;
1682 case 'r':
1683 decoded += '\r';
1684 break;
1685 case 't':
1686 decoded += '\t';
1687 break;
1688 case 'u': {
1689 unsigned int unicode;
1690 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1691 return false;
1692 decoded += codePointToUTF8(unicode);
1693 } break;
1694 default:
1695 return addError("Bad escape sequence in string", token, current);
1696 }
1697 } else {
1698 decoded += c;
1699 }
1700 }
1701 return true;
1702 }
1703
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1704 bool OurReader::decodeUnicodeCodePoint(Token& token,
1705 Location& current,
1706 Location end,
1707 unsigned int& unicode) {
1708
1709 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1710 return false;
1711 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1712 // surrogate pairs
1713 if (end - current < 6)
1714 return addError(
1715 "additional six characters expected to parse unicode surrogate pair.",
1716 token, current);
1717 if (*(current++) == '\\' && *(current++) == 'u') {
1718 unsigned int surrogatePair;
1719 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1720 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1721 } else
1722 return false;
1723 } else
1724 return addError("expecting another \\u token to begin the second half of "
1725 "a unicode surrogate pair",
1726 token, current);
1727 }
1728 return true;
1729 }
1730
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1731 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1732 Location& current,
1733 Location end,
1734 unsigned int& ret_unicode) {
1735 if (end - current < 4)
1736 return addError(
1737 "Bad unicode escape sequence in string: four digits expected.", token,
1738 current);
1739 int unicode = 0;
1740 for (int index = 0; index < 4; ++index) {
1741 Char c = *current++;
1742 unicode *= 16;
1743 if (c >= '0' && c <= '9')
1744 unicode += c - '0';
1745 else if (c >= 'a' && c <= 'f')
1746 unicode += c - 'a' + 10;
1747 else if (c >= 'A' && c <= 'F')
1748 unicode += c - 'A' + 10;
1749 else
1750 return addError(
1751 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1752 token, current);
1753 }
1754 ret_unicode = static_cast<unsigned int>(unicode);
1755 return true;
1756 }
1757
addError(const String & message,Token & token,Location extra)1758 bool OurReader::addError(const String& message, Token& token, Location extra) {
1759 ErrorInfo info;
1760 info.token_ = token;
1761 info.message_ = message;
1762 info.extra_ = extra;
1763 errors_.push_back(info);
1764 return false;
1765 }
1766
recoverFromError(TokenType skipUntilToken)1767 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1768 size_t errorCount = errors_.size();
1769 Token skip;
1770 for (;;) {
1771 if (!readToken(skip))
1772 errors_.resize(errorCount); // discard errors caused by recovery
1773 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1774 break;
1775 }
1776 errors_.resize(errorCount);
1777 return false;
1778 }
1779
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1780 bool OurReader::addErrorAndRecover(const String& message,
1781 Token& token,
1782 TokenType skipUntilToken) {
1783 addError(message, token);
1784 return recoverFromError(skipUntilToken);
1785 }
1786
currentValue()1787 Value& OurReader::currentValue() { return *(nodes_.top()); }
1788
getNextChar()1789 OurReader::Char OurReader::getNextChar() {
1790 if (current_ == end_)
1791 return 0;
1792 return *current_++;
1793 }
1794
getLocationLineAndColumn(Location location,int & line,int & column) const1795 void OurReader::getLocationLineAndColumn(Location location,
1796 int& line,
1797 int& column) const {
1798 Location current = begin_;
1799 Location lastLineStart = current;
1800 line = 0;
1801 while (current < location && current != end_) {
1802 Char c = *current++;
1803 if (c == '\r') {
1804 if (*current == '\n')
1805 ++current;
1806 lastLineStart = current;
1807 ++line;
1808 } else if (c == '\n') {
1809 lastLineStart = current;
1810 ++line;
1811 }
1812 }
1813 // column & line start at 1
1814 column = int(location - lastLineStart) + 1;
1815 ++line;
1816 }
1817
getLocationLineAndColumn(Location location) const1818 String OurReader::getLocationLineAndColumn(Location location) const {
1819 int line, column;
1820 getLocationLineAndColumn(location, line, column);
1821 char buffer[18 + 16 + 16 + 1];
1822 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1823 return buffer;
1824 }
1825
getFormattedErrorMessages() const1826 String OurReader::getFormattedErrorMessages() const {
1827 String formattedMessage;
1828 for (const auto& error : errors_) {
1829 formattedMessage +=
1830 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1831 formattedMessage += " " + error.message_ + "\n";
1832 if (error.extra_)
1833 formattedMessage +=
1834 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1835 }
1836 return formattedMessage;
1837 }
1838
getStructuredErrors() const1839 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1840 std::vector<OurReader::StructuredError> allErrors;
1841 for (const auto& error : errors_) {
1842 OurReader::StructuredError structured;
1843 structured.offset_start = error.token_.start_ - begin_;
1844 structured.offset_limit = error.token_.end_ - begin_;
1845 structured.message = error.message_;
1846 allErrors.push_back(structured);
1847 }
1848 return allErrors;
1849 }
1850
pushError(const Value & value,const String & message)1851 bool OurReader::pushError(const Value& value, const String& message) {
1852 ptrdiff_t length = end_ - begin_;
1853 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
1854 return false;
1855 Token token;
1856 token.type_ = tokenError;
1857 token.start_ = begin_ + value.getOffsetStart();
1858 token.end_ = begin_ + value.getOffsetLimit();
1859 ErrorInfo info;
1860 info.token_ = token;
1861 info.message_ = message;
1862 info.extra_ = nullptr;
1863 errors_.push_back(info);
1864 return true;
1865 }
1866
pushError(const Value & value,const String & message,const Value & extra)1867 bool OurReader::pushError(const Value& value,
1868 const String& message,
1869 const Value& extra) {
1870 ptrdiff_t length = end_ - begin_;
1871 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
1872 extra.getOffsetLimit() > length)
1873 return false;
1874 Token token;
1875 token.type_ = tokenError;
1876 token.start_ = begin_ + value.getOffsetStart();
1877 token.end_ = begin_ + value.getOffsetLimit();
1878 ErrorInfo info;
1879 info.token_ = token;
1880 info.message_ = message;
1881 info.extra_ = begin_ + extra.getOffsetStart();
1882 errors_.push_back(info);
1883 return true;
1884 }
1885
good() const1886 bool OurReader::good() const { return errors_.empty(); }
1887
1888 class OurCharReader : public CharReader {
1889 bool const collectComments_;
1890 OurReader reader_;
1891
1892 public:
OurCharReader(bool collectComments,OurFeatures const & features)1893 OurCharReader(bool collectComments, OurFeatures const& features)
1894 : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1895 bool parse(char const* beginDoc,
1896 char const* endDoc,
1897 Value* root,
1898 String* errs) override {
1899 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1900 if (errs) {
1901 *errs = reader_.getFormattedErrorMessages();
1902 }
1903 return ok;
1904 }
1905 };
1906
CharReaderBuilder()1907 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1908 CharReaderBuilder::~CharReaderBuilder() = default;
newCharReader() const1909 CharReader* CharReaderBuilder::newCharReader() const {
1910 bool collectComments = settings_["collectComments"].asBool();
1911 OurFeatures features = OurFeatures::all();
1912 features.allowComments_ = settings_["allowComments"].asBool();
1913 features.strictRoot_ = settings_["strictRoot"].asBool();
1914 features.allowDroppedNullPlaceholders_ =
1915 settings_["allowDroppedNullPlaceholders"].asBool();
1916 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1917 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1918
1919 // Stack limit is always a size_t, so we get this as an unsigned int
1920 // regardless of it we have 64-bit integer support enabled.
1921 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1922 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1923 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1924 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1925 return new OurCharReader(collectComments, features);
1926 }
getValidReaderKeys(std::set<String> * valid_keys)1927 static void getValidReaderKeys(std::set<String>* valid_keys) {
1928 valid_keys->clear();
1929 valid_keys->insert("collectComments");
1930 valid_keys->insert("allowComments");
1931 valid_keys->insert("strictRoot");
1932 valid_keys->insert("allowDroppedNullPlaceholders");
1933 valid_keys->insert("allowNumericKeys");
1934 valid_keys->insert("allowSingleQuotes");
1935 valid_keys->insert("stackLimit");
1936 valid_keys->insert("failIfExtra");
1937 valid_keys->insert("rejectDupKeys");
1938 valid_keys->insert("allowSpecialFloats");
1939 }
validate(Json::Value * invalid) const1940 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1941 Json::Value my_invalid;
1942 if (!invalid)
1943 invalid = &my_invalid; // so we do not need to test for NULL
1944 Json::Value& inv = *invalid;
1945 std::set<String> valid_keys;
1946 getValidReaderKeys(&valid_keys);
1947 Value::Members keys = settings_.getMemberNames();
1948 size_t n = keys.size();
1949 for (size_t i = 0; i < n; ++i) {
1950 String const& key = keys[i];
1951 if (valid_keys.find(key) == valid_keys.end()) {
1952 inv[key] = settings_[key];
1953 }
1954 }
1955 return inv.empty();
1956 }
operator [](const String & key)1957 Value& CharReaderBuilder::operator[](const String& key) {
1958 return settings_[key];
1959 }
1960 // static
strictMode(Json::Value * settings)1961 void CharReaderBuilder::strictMode(Json::Value* settings) {
1962 //! [CharReaderBuilderStrictMode]
1963 (*settings)["allowComments"] = false;
1964 (*settings)["strictRoot"] = true;
1965 (*settings)["allowDroppedNullPlaceholders"] = false;
1966 (*settings)["allowNumericKeys"] = false;
1967 (*settings)["allowSingleQuotes"] = false;
1968 (*settings)["stackLimit"] = 1000;
1969 (*settings)["failIfExtra"] = true;
1970 (*settings)["rejectDupKeys"] = true;
1971 (*settings)["allowSpecialFloats"] = false;
1972 //! [CharReaderBuilderStrictMode]
1973 }
1974 // static
setDefaults(Json::Value * settings)1975 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1976 //! [CharReaderBuilderDefaults]
1977 (*settings)["collectComments"] = true;
1978 (*settings)["allowComments"] = true;
1979 (*settings)["strictRoot"] = false;
1980 (*settings)["allowDroppedNullPlaceholders"] = false;
1981 (*settings)["allowNumericKeys"] = false;
1982 (*settings)["allowSingleQuotes"] = false;
1983 (*settings)["stackLimit"] = 1000;
1984 (*settings)["failIfExtra"] = false;
1985 (*settings)["rejectDupKeys"] = false;
1986 (*settings)["allowSpecialFloats"] = false;
1987 //! [CharReaderBuilderDefaults]
1988 }
1989
1990 //////////////////////////////////
1991 // global functions
1992
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1993 bool parseFromStream(CharReader::Factory const& fact,
1994 IStream& sin,
1995 Value* root,
1996 String* errs) {
1997 OStringStream ssin;
1998 ssin << sin.rdbuf();
1999 String doc = ssin.str();
2000 char const* begin = doc.data();
2001 char const* end = begin + doc.size();
2002 // Note that we do not actually need a null-terminator.
2003 CharReaderPtr const reader(fact.newCharReader());
2004 return reader->parse(begin, end, root, errs);
2005 }
2006
operator >>(IStream & sin,Value & root)2007 IStream& operator>>(IStream& sin, Value& root) {
2008 CharReaderBuilder b;
2009 String errs;
2010 bool ok = parseFromStream(b, sin, &root, &errs);
2011 if (!ok) {
2012 throwRuntimeError(errs);
2013 }
2014 return sin;
2015 }
2016
2017 } // namespace Json
2018