1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <istream>
18 #include <limits>
19 #include <memory>
20 #include <set>
21 #include <sstream>
22 #include <utility>
23
24 #include <cstdio>
25 #if __cplusplus >= 201103L
26
27 #if !defined(sscanf)
28 #define sscanf std::sscanf
29 #endif
30
31 #endif //__cplusplus
32
33 #if defined(_MSC_VER)
34 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
35 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
36 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
37 #endif //_MSC_VER
38
39 #if defined(_MSC_VER)
40 // Disable warning about strdup being deprecated.
41 #pragma warning(disable : 4996)
42 #endif
43
44 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
45 // time to change the stack limit
46 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
47 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
48 #endif
49
50 static size_t const stackLimit_g =
51 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
52
53 namespace Json {
54
55 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
56 using CharReaderPtr = std::unique_ptr<CharReader>;
57 #else
58 using CharReaderPtr = std::auto_ptr<CharReader>;
59 #endif
60
61 // Implementation of class Features
62 // ////////////////////////////////
63
64 Features::Features() = default;
65
all()66 Features Features::all() { return {}; }
67
strictMode()68 Features Features::strictMode() {
69 Features features;
70 features.allowComments_ = false;
71 features.strictRoot_ = true;
72 features.allowDroppedNullPlaceholders_ = false;
73 features.allowNumericKeys_ = false;
74 return features;
75 }
76
77 // Implementation of class Reader
78 // ////////////////////////////////
79
containsNewLine(Reader::Location begin,Reader::Location end)80 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
81 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
82 }
83
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86
Reader()87 Reader::Reader() : features_(Features::all()) {}
88
Reader(const Features & features)89 Reader::Reader(const Features& features) : features_(features) {}
90
parse(const std::string & document,Value & root,bool collectComments)91 bool Reader::parse(const std::string& document, Value& root,
92 bool collectComments) {
93 document_.assign(document.begin(), document.end());
94 const char* begin = document_.c_str();
95 const char* end = begin + document_.length();
96 return parse(begin, end, root, collectComments);
97 }
98
parse(std::istream & is,Value & root,bool collectComments)99 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
100 // std::istream_iterator<char> begin(is);
101 // std::istream_iterator<char> end;
102 // Those would allow streamed input from a file, if parse() were a
103 // template function.
104
105 // Since String is reference-counted, this at least does not
106 // create an extra copy.
107 String doc;
108 std::getline(is, doc, static_cast<char> EOF);
109 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110 }
111
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)112 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113 bool collectComments) {
114 if (!features_.allowComments_) {
115 collectComments = false;
116 }
117
118 begin_ = beginDoc;
119 end_ = endDoc;
120 collectComments_ = collectComments;
121 current_ = begin_;
122 lastValueEnd_ = nullptr;
123 lastValue_ = nullptr;
124 commentsBefore_.clear();
125 errors_.clear();
126 while (!nodes_.empty())
127 nodes_.pop();
128 nodes_.push(&root);
129
130 bool successful = readValue();
131 Token token;
132 skipCommentTokens(token);
133 if (collectComments_ && !commentsBefore_.empty())
134 root.setComment(commentsBefore_, commentAfter);
135 if (features_.strictRoot_) {
136 if (!root.isArray() && !root.isObject()) {
137 // Set error location to start of doc, ideally should be first token found
138 // in doc
139 token.type_ = tokenError;
140 token.start_ = beginDoc;
141 token.end_ = endDoc;
142 addError(
143 "A valid JSON document must be either an array or an object value.",
144 token);
145 return false;
146 }
147 }
148 return successful;
149 }
150
readValue()151 bool Reader::readValue() {
152 // readValue() may call itself only if it calls readObject() or ReadArray().
153 // These methods execute nodes_.push() just before and nodes_.pop)() just
154 // after calling readValue(). parse() executes one nodes_.push(), so > instead
155 // of >=.
156 if (nodes_.size() > stackLimit_g)
157 throwRuntimeError("Exceeded stackLimit in readValue().");
158
159 Token token;
160 skipCommentTokens(token);
161 bool successful = true;
162
163 if (collectComments_ && !commentsBefore_.empty()) {
164 currentValue().setComment(commentsBefore_, commentBefore);
165 commentsBefore_.clear();
166 }
167
168 switch (token.type_) {
169 case tokenObjectBegin:
170 successful = readObject(token);
171 currentValue().setOffsetLimit(current_ - begin_);
172 break;
173 case tokenArrayBegin:
174 successful = readArray(token);
175 currentValue().setOffsetLimit(current_ - begin_);
176 break;
177 case tokenNumber:
178 successful = decodeNumber(token);
179 break;
180 case tokenString:
181 successful = decodeString(token);
182 break;
183 case tokenTrue: {
184 Value v(true);
185 currentValue().swapPayload(v);
186 currentValue().setOffsetStart(token.start_ - begin_);
187 currentValue().setOffsetLimit(token.end_ - begin_);
188 } break;
189 case tokenFalse: {
190 Value v(false);
191 currentValue().swapPayload(v);
192 currentValue().setOffsetStart(token.start_ - begin_);
193 currentValue().setOffsetLimit(token.end_ - begin_);
194 } break;
195 case tokenNull: {
196 Value v;
197 currentValue().swapPayload(v);
198 currentValue().setOffsetStart(token.start_ - begin_);
199 currentValue().setOffsetLimit(token.end_ - begin_);
200 } break;
201 case tokenArraySeparator:
202 case tokenObjectEnd:
203 case tokenArrayEnd:
204 if (features_.allowDroppedNullPlaceholders_) {
205 // "Un-read" the current token and mark the current value as a null
206 // token.
207 current_--;
208 Value v;
209 currentValue().swapPayload(v);
210 currentValue().setOffsetStart(current_ - begin_ - 1);
211 currentValue().setOffsetLimit(current_ - begin_);
212 break;
213 } // Else, fall through...
214 default:
215 currentValue().setOffsetStart(token.start_ - begin_);
216 currentValue().setOffsetLimit(token.end_ - begin_);
217 return addError("Syntax error: value, object or array expected.", token);
218 }
219
220 if (collectComments_) {
221 lastValueEnd_ = current_;
222 lastValue_ = ¤tValue();
223 }
224
225 return successful;
226 }
227
skipCommentTokens(Token & token)228 void Reader::skipCommentTokens(Token& token) {
229 if (features_.allowComments_) {
230 do {
231 readToken(token);
232 } while (token.type_ == tokenComment);
233 } else {
234 readToken(token);
235 }
236 }
237
readToken(Token & token)238 bool Reader::readToken(Token& token) {
239 skipSpaces();
240 token.start_ = current_;
241 Char c = getNextChar();
242 bool ok = true;
243 switch (c) {
244 case '{':
245 token.type_ = tokenObjectBegin;
246 break;
247 case '}':
248 token.type_ = tokenObjectEnd;
249 break;
250 case '[':
251 token.type_ = tokenArrayBegin;
252 break;
253 case ']':
254 token.type_ = tokenArrayEnd;
255 break;
256 case '"':
257 token.type_ = tokenString;
258 ok = readString();
259 break;
260 case '/':
261 token.type_ = tokenComment;
262 ok = readComment();
263 break;
264 case '0':
265 case '1':
266 case '2':
267 case '3':
268 case '4':
269 case '5':
270 case '6':
271 case '7':
272 case '8':
273 case '9':
274 case '-':
275 token.type_ = tokenNumber;
276 readNumber();
277 break;
278 case 't':
279 token.type_ = tokenTrue;
280 ok = match("rue", 3);
281 break;
282 case 'f':
283 token.type_ = tokenFalse;
284 ok = match("alse", 4);
285 break;
286 case 'n':
287 token.type_ = tokenNull;
288 ok = match("ull", 3);
289 break;
290 case ',':
291 token.type_ = tokenArraySeparator;
292 break;
293 case ':':
294 token.type_ = tokenMemberSeparator;
295 break;
296 case 0:
297 token.type_ = tokenEndOfStream;
298 break;
299 default:
300 ok = false;
301 break;
302 }
303 if (!ok)
304 token.type_ = tokenError;
305 token.end_ = current_;
306 return ok;
307 }
308
skipSpaces()309 void Reader::skipSpaces() {
310 while (current_ != end_) {
311 Char c = *current_;
312 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313 ++current_;
314 else
315 break;
316 }
317 }
318
match(const Char * pattern,int patternLength)319 bool Reader::match(const Char* pattern, int patternLength) {
320 if (end_ - current_ < patternLength)
321 return false;
322 int index = patternLength;
323 while (index--)
324 if (current_[index] != pattern[index])
325 return false;
326 current_ += patternLength;
327 return true;
328 }
329
readComment()330 bool Reader::readComment() {
331 Location commentBegin = current_ - 1;
332 Char c = getNextChar();
333 bool successful = false;
334 if (c == '*')
335 successful = readCStyleComment();
336 else if (c == '/')
337 successful = readCppStyleComment();
338 if (!successful)
339 return false;
340
341 if (collectComments_) {
342 CommentPlacement placement = commentBefore;
343 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344 if (c != '*' || !containsNewLine(commentBegin, current_))
345 placement = commentAfterOnSameLine;
346 }
347
348 addComment(commentBegin, current_, placement);
349 }
350 return true;
351 }
352
normalizeEOL(Reader::Location begin,Reader::Location end)353 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354 String normalized;
355 normalized.reserve(static_cast<size_t>(end - begin));
356 Reader::Location current = begin;
357 while (current != end) {
358 char c = *current++;
359 if (c == '\r') {
360 if (current != end && *current == '\n')
361 // convert dos EOL
362 ++current;
363 // convert Mac EOL
364 normalized += '\n';
365 } else {
366 normalized += c;
367 }
368 }
369 return normalized;
370 }
371
addComment(Location begin,Location end,CommentPlacement placement)372 void Reader::addComment(Location begin, Location end,
373 CommentPlacement placement) {
374 assert(collectComments_);
375 const String& normalized = normalizeEOL(begin, end);
376 if (placement == commentAfterOnSameLine) {
377 assert(lastValue_ != nullptr);
378 lastValue_->setComment(normalized, placement);
379 } else {
380 commentsBefore_ += normalized;
381 }
382 }
383
readCStyleComment()384 bool Reader::readCStyleComment() {
385 while ((current_ + 1) < end_) {
386 Char c = getNextChar();
387 if (c == '*' && *current_ == '/')
388 break;
389 }
390 return getNextChar() == '/';
391 }
392
readCppStyleComment()393 bool Reader::readCppStyleComment() {
394 while (current_ != end_) {
395 Char c = getNextChar();
396 if (c == '\n')
397 break;
398 if (c == '\r') {
399 // Consume DOS EOL. It will be normalized in addComment.
400 if (current_ != end_ && *current_ == '\n')
401 getNextChar();
402 // Break on Moc OS 9 EOL.
403 break;
404 }
405 }
406 return true;
407 }
408
readNumber()409 void Reader::readNumber() {
410 Location p = current_;
411 char c = '0'; // stopgap for already consumed character
412 // integral part
413 while (c >= '0' && c <= '9')
414 c = (current_ = p) < end_ ? *p++ : '\0';
415 // fractional part
416 if (c == '.') {
417 c = (current_ = p) < end_ ? *p++ : '\0';
418 while (c >= '0' && c <= '9')
419 c = (current_ = p) < end_ ? *p++ : '\0';
420 }
421 // exponential part
422 if (c == 'e' || c == 'E') {
423 c = (current_ = p) < end_ ? *p++ : '\0';
424 if (c == '+' || c == '-')
425 c = (current_ = p) < end_ ? *p++ : '\0';
426 while (c >= '0' && c <= '9')
427 c = (current_ = p) < end_ ? *p++ : '\0';
428 }
429 }
430
readString()431 bool Reader::readString() {
432 Char c = '\0';
433 while (current_ != end_) {
434 c = getNextChar();
435 if (c == '\\')
436 getNextChar();
437 else if (c == '"')
438 break;
439 }
440 return c == '"';
441 }
442
readObject(Token & token)443 bool Reader::readObject(Token& token) {
444 Token tokenName;
445 String name;
446 Value init(objectValue);
447 currentValue().swapPayload(init);
448 currentValue().setOffsetStart(token.start_ - begin_);
449 while (readToken(tokenName)) {
450 bool initialTokenOk = true;
451 while (tokenName.type_ == tokenComment && initialTokenOk)
452 initialTokenOk = readToken(tokenName);
453 if (!initialTokenOk)
454 break;
455 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
456 return true;
457 name.clear();
458 if (tokenName.type_ == tokenString) {
459 if (!decodeString(tokenName, name))
460 return recoverFromError(tokenObjectEnd);
461 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
462 Value numberName;
463 if (!decodeNumber(tokenName, numberName))
464 return recoverFromError(tokenObjectEnd);
465 name = numberName.asString();
466 } else {
467 break;
468 }
469
470 Token colon;
471 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
472 return addErrorAndRecover("Missing ':' after object member name", colon,
473 tokenObjectEnd);
474 }
475 Value& value = currentValue()[name];
476 nodes_.push(&value);
477 bool ok = readValue();
478 nodes_.pop();
479 if (!ok) // error already set
480 return recoverFromError(tokenObjectEnd);
481
482 Token comma;
483 if (!readToken(comma) ||
484 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
485 comma.type_ != tokenComment)) {
486 return addErrorAndRecover("Missing ',' or '}' in object declaration",
487 comma, tokenObjectEnd);
488 }
489 bool finalizeTokenOk = true;
490 while (comma.type_ == tokenComment && finalizeTokenOk)
491 finalizeTokenOk = readToken(comma);
492 if (comma.type_ == tokenObjectEnd)
493 return true;
494 }
495 return addErrorAndRecover("Missing '}' or object member name", tokenName,
496 tokenObjectEnd);
497 }
498
readArray(Token & token)499 bool Reader::readArray(Token& token) {
500 Value init(arrayValue);
501 currentValue().swapPayload(init);
502 currentValue().setOffsetStart(token.start_ - begin_);
503 skipSpaces();
504 if (current_ != end_ && *current_ == ']') // empty array
505 {
506 Token endArray;
507 readToken(endArray);
508 return true;
509 }
510 int index = 0;
511 for (;;) {
512 Value& value = currentValue()[index++];
513 nodes_.push(&value);
514 bool ok = readValue();
515 nodes_.pop();
516 if (!ok) // error already set
517 return recoverFromError(tokenArrayEnd);
518
519 Token currentToken;
520 // Accept Comment after last item in the array.
521 ok = readToken(currentToken);
522 while (currentToken.type_ == tokenComment && ok) {
523 ok = readToken(currentToken);
524 }
525 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
526 currentToken.type_ != tokenArrayEnd);
527 if (!ok || badTokenType) {
528 return addErrorAndRecover("Missing ',' or ']' in array declaration",
529 currentToken, tokenArrayEnd);
530 }
531 if (currentToken.type_ == tokenArrayEnd)
532 break;
533 }
534 return true;
535 }
536
decodeNumber(Token & token)537 bool Reader::decodeNumber(Token& token) {
538 Value decoded;
539 if (!decodeNumber(token, decoded))
540 return false;
541 currentValue().swapPayload(decoded);
542 currentValue().setOffsetStart(token.start_ - begin_);
543 currentValue().setOffsetLimit(token.end_ - begin_);
544 return true;
545 }
546
decodeNumber(Token & token,Value & decoded)547 bool Reader::decodeNumber(Token& token, Value& decoded) {
548 // Attempts to parse the number as an integer. If the number is
549 // larger than the maximum supported value of an integer then
550 // we decode the number as a double.
551 Location current = token.start_;
552 bool isNegative = *current == '-';
553 if (isNegative)
554 ++current;
555 // TODO: Help the compiler do the div and mod at compile time or get rid of
556 // them.
557 Value::LargestUInt maxIntegerValue =
558 isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
559 : Value::maxLargestUInt;
560 Value::LargestUInt threshold = maxIntegerValue / 10;
561 Value::LargestUInt value = 0;
562 while (current < token.end_) {
563 Char c = *current++;
564 if (c < '0' || c > '9')
565 return decodeDouble(token, decoded);
566 auto digit(static_cast<Value::UInt>(c - '0'));
567 if (value >= threshold) {
568 // We've hit or exceeded the max value divided by 10 (rounded down). If
569 // a) we've only just touched the limit, b) this is the last digit, and
570 // c) it's small enough to fit in that rounding delta, we're okay.
571 // Otherwise treat this number as a double to avoid overflow.
572 if (value > threshold || current != token.end_ ||
573 digit > maxIntegerValue % 10) {
574 return decodeDouble(token, decoded);
575 }
576 }
577 value = value * 10 + digit;
578 }
579 if (isNegative && value == maxIntegerValue)
580 decoded = Value::minLargestInt;
581 else if (isNegative)
582 decoded = -Value::LargestInt(value);
583 else if (value <= Value::LargestUInt(Value::maxInt))
584 decoded = Value::LargestInt(value);
585 else
586 decoded = value;
587 return true;
588 }
589
decodeDouble(Token & token)590 bool Reader::decodeDouble(Token& token) {
591 Value decoded;
592 if (!decodeDouble(token, decoded))
593 return false;
594 currentValue().swapPayload(decoded);
595 currentValue().setOffsetStart(token.start_ - begin_);
596 currentValue().setOffsetLimit(token.end_ - begin_);
597 return true;
598 }
599
decodeDouble(Token & token,Value & decoded)600 bool Reader::decodeDouble(Token& token, Value& decoded) {
601 double value = 0;
602 String buffer(token.start_, token.end_);
603 IStringStream is(buffer);
604 if (!(is >> value))
605 return addError(
606 "'" + String(token.start_, token.end_) + "' is not a number.", token);
607 decoded = value;
608 return true;
609 }
610
decodeString(Token & token)611 bool Reader::decodeString(Token& token) {
612 String decoded_string;
613 if (!decodeString(token, decoded_string))
614 return false;
615 Value decoded(decoded_string);
616 currentValue().swapPayload(decoded);
617 currentValue().setOffsetStart(token.start_ - begin_);
618 currentValue().setOffsetLimit(token.end_ - begin_);
619 return true;
620 }
621
decodeString(Token & token,String & decoded)622 bool Reader::decodeString(Token& token, String& decoded) {
623 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
624 Location current = token.start_ + 1; // skip '"'
625 Location end = token.end_ - 1; // do not include '"'
626 while (current != end) {
627 Char c = *current++;
628 if (c == '"')
629 break;
630 if (c == '\\') {
631 if (current == end)
632 return addError("Empty escape sequence in string", token, current);
633 Char escape = *current++;
634 switch (escape) {
635 case '"':
636 decoded += '"';
637 break;
638 case '/':
639 decoded += '/';
640 break;
641 case '\\':
642 decoded += '\\';
643 break;
644 case 'b':
645 decoded += '\b';
646 break;
647 case 'f':
648 decoded += '\f';
649 break;
650 case 'n':
651 decoded += '\n';
652 break;
653 case 'r':
654 decoded += '\r';
655 break;
656 case 't':
657 decoded += '\t';
658 break;
659 case 'u': {
660 unsigned int unicode;
661 if (!decodeUnicodeCodePoint(token, current, end, unicode))
662 return false;
663 decoded += codePointToUTF8(unicode);
664 } break;
665 default:
666 return addError("Bad escape sequence in string", token, current);
667 }
668 } else {
669 decoded += c;
670 }
671 }
672 return true;
673 }
674
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)675 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
676 Location end, unsigned int& unicode) {
677
678 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
679 return false;
680 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
681 // surrogate pairs
682 if (end - current < 6)
683 return addError(
684 "additional six characters expected to parse unicode surrogate pair.",
685 token, current);
686 if (*(current++) == '\\' && *(current++) == 'u') {
687 unsigned int surrogatePair;
688 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
689 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
690 } else
691 return false;
692 } else
693 return addError("expecting another \\u token to begin the second half of "
694 "a unicode surrogate pair",
695 token, current);
696 }
697 return true;
698 }
699
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)700 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
701 Location end,
702 unsigned int& ret_unicode) {
703 if (end - current < 4)
704 return addError(
705 "Bad unicode escape sequence in string: four digits expected.", token,
706 current);
707 int unicode = 0;
708 for (int index = 0; index < 4; ++index) {
709 Char c = *current++;
710 unicode *= 16;
711 if (c >= '0' && c <= '9')
712 unicode += c - '0';
713 else if (c >= 'a' && c <= 'f')
714 unicode += c - 'a' + 10;
715 else if (c >= 'A' && c <= 'F')
716 unicode += c - 'A' + 10;
717 else
718 return addError(
719 "Bad unicode escape sequence in string: hexadecimal digit expected.",
720 token, current);
721 }
722 ret_unicode = static_cast<unsigned int>(unicode);
723 return true;
724 }
725
addError(const String & message,Token & token,Location extra)726 bool Reader::addError(const String& message, Token& token, Location extra) {
727 ErrorInfo info;
728 info.token_ = token;
729 info.message_ = message;
730 info.extra_ = extra;
731 errors_.push_back(info);
732 return false;
733 }
734
recoverFromError(TokenType skipUntilToken)735 bool Reader::recoverFromError(TokenType skipUntilToken) {
736 size_t const errorCount = errors_.size();
737 Token skip;
738 for (;;) {
739 if (!readToken(skip))
740 errors_.resize(errorCount); // discard errors caused by recovery
741 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
742 break;
743 }
744 errors_.resize(errorCount);
745 return false;
746 }
747
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)748 bool Reader::addErrorAndRecover(const String& message, Token& token,
749 TokenType skipUntilToken) {
750 addError(message, token);
751 return recoverFromError(skipUntilToken);
752 }
753
currentValue()754 Value& Reader::currentValue() { return *(nodes_.top()); }
755
getNextChar()756 Reader::Char Reader::getNextChar() {
757 if (current_ == end_)
758 return 0;
759 return *current_++;
760 }
761
getLocationLineAndColumn(Location location,int & line,int & column) const762 void Reader::getLocationLineAndColumn(Location location, int& line,
763 int& column) const {
764 Location current = begin_;
765 Location lastLineStart = current;
766 line = 0;
767 while (current < location && current != end_) {
768 Char c = *current++;
769 if (c == '\r') {
770 if (*current == '\n')
771 ++current;
772 lastLineStart = current;
773 ++line;
774 } else if (c == '\n') {
775 lastLineStart = current;
776 ++line;
777 }
778 }
779 // column & line start at 1
780 column = int(location - lastLineStart) + 1;
781 ++line;
782 }
783
getLocationLineAndColumn(Location location) const784 String Reader::getLocationLineAndColumn(Location location) const {
785 int line, column;
786 getLocationLineAndColumn(location, line, column);
787 char buffer[18 + 16 + 16 + 1];
788 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
789 return buffer;
790 }
791
792 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const793 String Reader::getFormatedErrorMessages() const {
794 return getFormattedErrorMessages();
795 }
796
getFormattedErrorMessages() const797 String Reader::getFormattedErrorMessages() const {
798 String formattedMessage;
799 for (const auto& error : errors_) {
800 formattedMessage +=
801 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
802 formattedMessage += " " + error.message_ + "\n";
803 if (error.extra_)
804 formattedMessage +=
805 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
806 }
807 return formattedMessage;
808 }
809
getStructuredErrors() const810 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
811 std::vector<Reader::StructuredError> allErrors;
812 for (const auto& error : errors_) {
813 Reader::StructuredError structured;
814 structured.offset_start = error.token_.start_ - begin_;
815 structured.offset_limit = error.token_.end_ - begin_;
816 structured.message = error.message_;
817 allErrors.push_back(structured);
818 }
819 return allErrors;
820 }
821
pushError(const Value & value,const String & message)822 bool Reader::pushError(const Value& value, const String& message) {
823 ptrdiff_t const length = end_ - begin_;
824 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
825 return false;
826 Token token;
827 token.type_ = tokenError;
828 token.start_ = begin_ + value.getOffsetStart();
829 token.end_ = begin_ + value.getOffsetLimit();
830 ErrorInfo info;
831 info.token_ = token;
832 info.message_ = message;
833 info.extra_ = nullptr;
834 errors_.push_back(info);
835 return true;
836 }
837
pushError(const Value & value,const String & message,const Value & extra)838 bool Reader::pushError(const Value& value, const String& message,
839 const Value& extra) {
840 ptrdiff_t const length = end_ - begin_;
841 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
842 extra.getOffsetLimit() > length)
843 return false;
844 Token token;
845 token.type_ = tokenError;
846 token.start_ = begin_ + value.getOffsetStart();
847 token.end_ = begin_ + value.getOffsetLimit();
848 ErrorInfo info;
849 info.token_ = token;
850 info.message_ = message;
851 info.extra_ = begin_ + extra.getOffsetStart();
852 errors_.push_back(info);
853 return true;
854 }
855
good() const856 bool Reader::good() const { return errors_.empty(); }
857
858 // Originally copied from the Features class (now deprecated), used internally
859 // for features implementation.
860 class OurFeatures {
861 public:
862 static OurFeatures all();
863 bool allowComments_;
864 bool allowTrailingCommas_;
865 bool strictRoot_;
866 bool allowDroppedNullPlaceholders_;
867 bool allowNumericKeys_;
868 bool allowSingleQuotes_;
869 bool failIfExtra_;
870 bool rejectDupKeys_;
871 bool allowSpecialFloats_;
872 bool skipBom_;
873 size_t stackLimit_;
874 }; // OurFeatures
875
all()876 OurFeatures OurFeatures::all() { return {}; }
877
878 // Implementation of class Reader
879 // ////////////////////////////////
880
881 // Originally copied from the Reader class (now deprecated), used internally
882 // for implementing JSON reading.
883 class OurReader {
884 public:
885 using Char = char;
886 using Location = const Char*;
887 struct StructuredError {
888 ptrdiff_t offset_start;
889 ptrdiff_t offset_limit;
890 String message;
891 };
892
893 explicit OurReader(OurFeatures const& features);
894 bool parse(const char* beginDoc, const char* endDoc, Value& root,
895 bool collectComments = true);
896 String getFormattedErrorMessages() const;
897 std::vector<StructuredError> getStructuredErrors() const;
898
899 private:
900 OurReader(OurReader const&); // no impl
901 void operator=(OurReader const&); // no impl
902
903 enum TokenType {
904 tokenEndOfStream = 0,
905 tokenObjectBegin,
906 tokenObjectEnd,
907 tokenArrayBegin,
908 tokenArrayEnd,
909 tokenString,
910 tokenNumber,
911 tokenTrue,
912 tokenFalse,
913 tokenNull,
914 tokenNaN,
915 tokenPosInf,
916 tokenNegInf,
917 tokenArraySeparator,
918 tokenMemberSeparator,
919 tokenComment,
920 tokenError
921 };
922
923 class Token {
924 public:
925 TokenType type_;
926 Location start_;
927 Location end_;
928 };
929
930 class ErrorInfo {
931 public:
932 Token token_;
933 String message_;
934 Location extra_;
935 };
936
937 using Errors = std::deque<ErrorInfo>;
938
939 bool readToken(Token& token);
940 void skipSpaces();
941 void skipBom(bool skipBom);
942 bool match(const Char* pattern, int patternLength);
943 bool readComment();
944 bool readCStyleComment(bool* containsNewLineResult);
945 bool readCppStyleComment();
946 bool readString();
947 bool readStringSingleQuote();
948 bool readNumber(bool checkInf);
949 bool readValue();
950 bool readObject(Token& token);
951 bool readArray(Token& token);
952 bool decodeNumber(Token& token);
953 bool decodeNumber(Token& token, Value& decoded);
954 bool decodeString(Token& token);
955 bool decodeString(Token& token, String& decoded);
956 bool decodeDouble(Token& token);
957 bool decodeDouble(Token& token, Value& decoded);
958 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
959 unsigned int& unicode);
960 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
961 Location end, unsigned int& unicode);
962 bool addError(const String& message, Token& token, Location extra = nullptr);
963 bool recoverFromError(TokenType skipUntilToken);
964 bool addErrorAndRecover(const String& message, Token& token,
965 TokenType skipUntilToken);
966 void skipUntilSpace();
967 Value& currentValue();
968 Char getNextChar();
969 void getLocationLineAndColumn(Location location, int& line,
970 int& column) const;
971 String getLocationLineAndColumn(Location location) const;
972 void addComment(Location begin, Location end, CommentPlacement placement);
973 void skipCommentTokens(Token& token);
974
975 static String normalizeEOL(Location begin, Location end);
976 static bool containsNewLine(Location begin, Location end);
977
978 using Nodes = std::stack<Value*>;
979
980 Nodes nodes_{};
981 Errors errors_{};
982 String document_{};
983 Location begin_ = nullptr;
984 Location end_ = nullptr;
985 Location current_ = nullptr;
986 Location lastValueEnd_ = nullptr;
987 Value* lastValue_ = nullptr;
988 bool lastValueHasAComment_ = false;
989 String commentsBefore_{};
990
991 OurFeatures const features_;
992 bool collectComments_ = false;
993 }; // OurReader
994
995 // complete copy of Read impl, for OurReader
996
containsNewLine(OurReader::Location begin,OurReader::Location end)997 bool OurReader::containsNewLine(OurReader::Location begin,
998 OurReader::Location end) {
999 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
1000 }
1001
OurReader(OurFeatures const & features)1002 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1003
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1004 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1005 bool collectComments) {
1006 if (!features_.allowComments_) {
1007 collectComments = false;
1008 }
1009
1010 begin_ = beginDoc;
1011 end_ = endDoc;
1012 collectComments_ = collectComments;
1013 current_ = begin_;
1014 lastValueEnd_ = nullptr;
1015 lastValue_ = nullptr;
1016 commentsBefore_.clear();
1017 errors_.clear();
1018 while (!nodes_.empty())
1019 nodes_.pop();
1020 nodes_.push(&root);
1021
1022 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1023 skipBom(features_.skipBom_);
1024 bool successful = readValue();
1025 nodes_.pop();
1026 Token token;
1027 skipCommentTokens(token);
1028 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1029 addError("Extra non-whitespace after JSON value.", token);
1030 return false;
1031 }
1032 if (collectComments_ && !commentsBefore_.empty())
1033 root.setComment(commentsBefore_, commentAfter);
1034 if (features_.strictRoot_) {
1035 if (!root.isArray() && !root.isObject()) {
1036 // Set error location to start of doc, ideally should be first token found
1037 // in doc
1038 token.type_ = tokenError;
1039 token.start_ = beginDoc;
1040 token.end_ = endDoc;
1041 addError(
1042 "A valid JSON document must be either an array or an object value.",
1043 token);
1044 return false;
1045 }
1046 }
1047 return successful;
1048 }
1049
readValue()1050 bool OurReader::readValue() {
1051 // To preserve the old behaviour we cast size_t to int.
1052 if (nodes_.size() > features_.stackLimit_)
1053 throwRuntimeError("Exceeded stackLimit in readValue().");
1054 Token token;
1055 skipCommentTokens(token);
1056 bool successful = true;
1057
1058 if (collectComments_ && !commentsBefore_.empty()) {
1059 currentValue().setComment(commentsBefore_, commentBefore);
1060 commentsBefore_.clear();
1061 }
1062
1063 switch (token.type_) {
1064 case tokenObjectBegin:
1065 successful = readObject(token);
1066 currentValue().setOffsetLimit(current_ - begin_);
1067 break;
1068 case tokenArrayBegin:
1069 successful = readArray(token);
1070 currentValue().setOffsetLimit(current_ - begin_);
1071 break;
1072 case tokenNumber:
1073 successful = decodeNumber(token);
1074 break;
1075 case tokenString:
1076 successful = decodeString(token);
1077 break;
1078 case tokenTrue: {
1079 Value v(true);
1080 currentValue().swapPayload(v);
1081 currentValue().setOffsetStart(token.start_ - begin_);
1082 currentValue().setOffsetLimit(token.end_ - begin_);
1083 } break;
1084 case tokenFalse: {
1085 Value v(false);
1086 currentValue().swapPayload(v);
1087 currentValue().setOffsetStart(token.start_ - begin_);
1088 currentValue().setOffsetLimit(token.end_ - begin_);
1089 } break;
1090 case tokenNull: {
1091 Value v;
1092 currentValue().swapPayload(v);
1093 currentValue().setOffsetStart(token.start_ - begin_);
1094 currentValue().setOffsetLimit(token.end_ - begin_);
1095 } break;
1096 case tokenNaN: {
1097 Value v(std::numeric_limits<double>::quiet_NaN());
1098 currentValue().swapPayload(v);
1099 currentValue().setOffsetStart(token.start_ - begin_);
1100 currentValue().setOffsetLimit(token.end_ - begin_);
1101 } break;
1102 case tokenPosInf: {
1103 Value v(std::numeric_limits<double>::infinity());
1104 currentValue().swapPayload(v);
1105 currentValue().setOffsetStart(token.start_ - begin_);
1106 currentValue().setOffsetLimit(token.end_ - begin_);
1107 } break;
1108 case tokenNegInf: {
1109 Value v(-std::numeric_limits<double>::infinity());
1110 currentValue().swapPayload(v);
1111 currentValue().setOffsetStart(token.start_ - begin_);
1112 currentValue().setOffsetLimit(token.end_ - begin_);
1113 } break;
1114 case tokenArraySeparator:
1115 case tokenObjectEnd:
1116 case tokenArrayEnd:
1117 if (features_.allowDroppedNullPlaceholders_) {
1118 // "Un-read" the current token and mark the current value as a null
1119 // token.
1120 current_--;
1121 Value v;
1122 currentValue().swapPayload(v);
1123 currentValue().setOffsetStart(current_ - begin_ - 1);
1124 currentValue().setOffsetLimit(current_ - begin_);
1125 break;
1126 } // else, fall through ...
1127 default:
1128 currentValue().setOffsetStart(token.start_ - begin_);
1129 currentValue().setOffsetLimit(token.end_ - begin_);
1130 return addError("Syntax error: value, object or array expected.", token);
1131 }
1132
1133 if (collectComments_) {
1134 lastValueEnd_ = current_;
1135 lastValueHasAComment_ = false;
1136 lastValue_ = ¤tValue();
1137 }
1138
1139 return successful;
1140 }
1141
skipCommentTokens(Token & token)1142 void OurReader::skipCommentTokens(Token& token) {
1143 if (features_.allowComments_) {
1144 do {
1145 readToken(token);
1146 } while (token.type_ == tokenComment);
1147 } else {
1148 readToken(token);
1149 }
1150 }
1151
readToken(Token & token)1152 bool OurReader::readToken(Token& token) {
1153 skipSpaces();
1154 token.start_ = current_;
1155 Char c = getNextChar();
1156 bool ok = true;
1157 switch (c) {
1158 case '{':
1159 token.type_ = tokenObjectBegin;
1160 break;
1161 case '}':
1162 token.type_ = tokenObjectEnd;
1163 break;
1164 case '[':
1165 token.type_ = tokenArrayBegin;
1166 break;
1167 case ']':
1168 token.type_ = tokenArrayEnd;
1169 break;
1170 case '"':
1171 token.type_ = tokenString;
1172 ok = readString();
1173 break;
1174 case '\'':
1175 if (features_.allowSingleQuotes_) {
1176 token.type_ = tokenString;
1177 ok = readStringSingleQuote();
1178 } else {
1179 // If we don't allow single quotes, this is a failure case.
1180 ok = false;
1181 }
1182 break;
1183 case '/':
1184 token.type_ = tokenComment;
1185 ok = readComment();
1186 break;
1187 case '0':
1188 case '1':
1189 case '2':
1190 case '3':
1191 case '4':
1192 case '5':
1193 case '6':
1194 case '7':
1195 case '8':
1196 case '9':
1197 token.type_ = tokenNumber;
1198 readNumber(false);
1199 break;
1200 case '-':
1201 if (readNumber(true)) {
1202 token.type_ = tokenNumber;
1203 } else {
1204 token.type_ = tokenNegInf;
1205 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1206 }
1207 break;
1208 case '+':
1209 if (readNumber(true)) {
1210 token.type_ = tokenNumber;
1211 } else {
1212 token.type_ = tokenPosInf;
1213 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1214 }
1215 break;
1216 case 't':
1217 token.type_ = tokenTrue;
1218 ok = match("rue", 3);
1219 break;
1220 case 'f':
1221 token.type_ = tokenFalse;
1222 ok = match("alse", 4);
1223 break;
1224 case 'n':
1225 token.type_ = tokenNull;
1226 ok = match("ull", 3);
1227 break;
1228 case 'N':
1229 if (features_.allowSpecialFloats_) {
1230 token.type_ = tokenNaN;
1231 ok = match("aN", 2);
1232 } else {
1233 ok = false;
1234 }
1235 break;
1236 case 'I':
1237 if (features_.allowSpecialFloats_) {
1238 token.type_ = tokenPosInf;
1239 ok = match("nfinity", 7);
1240 } else {
1241 ok = false;
1242 }
1243 break;
1244 case ',':
1245 token.type_ = tokenArraySeparator;
1246 break;
1247 case ':':
1248 token.type_ = tokenMemberSeparator;
1249 break;
1250 case 0:
1251 token.type_ = tokenEndOfStream;
1252 break;
1253 default:
1254 ok = false;
1255 break;
1256 }
1257 if (!ok)
1258 token.type_ = tokenError;
1259 token.end_ = current_;
1260 return ok;
1261 }
1262
skipSpaces()1263 void OurReader::skipSpaces() {
1264 while (current_ != end_) {
1265 Char c = *current_;
1266 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1267 ++current_;
1268 else
1269 break;
1270 }
1271 }
1272
skipBom(bool skipBom)1273 void OurReader::skipBom(bool skipBom) {
1274 // The default behavior is to skip BOM.
1275 if (skipBom) {
1276 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1277 begin_ += 3;
1278 current_ = begin_;
1279 }
1280 }
1281 }
1282
match(const Char * pattern,int patternLength)1283 bool OurReader::match(const Char* pattern, int patternLength) {
1284 if (end_ - current_ < patternLength)
1285 return false;
1286 int index = patternLength;
1287 while (index--)
1288 if (current_[index] != pattern[index])
1289 return false;
1290 current_ += patternLength;
1291 return true;
1292 }
1293
readComment()1294 bool OurReader::readComment() {
1295 const Location commentBegin = current_ - 1;
1296 const Char c = getNextChar();
1297 bool successful = false;
1298 bool cStyleWithEmbeddedNewline = false;
1299
1300 const bool isCStyleComment = (c == '*');
1301 const bool isCppStyleComment = (c == '/');
1302 if (isCStyleComment) {
1303 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1304 } else if (isCppStyleComment) {
1305 successful = readCppStyleComment();
1306 }
1307
1308 if (!successful)
1309 return false;
1310
1311 if (collectComments_) {
1312 CommentPlacement placement = commentBefore;
1313
1314 if (!lastValueHasAComment_) {
1315 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1316 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1317 placement = commentAfterOnSameLine;
1318 lastValueHasAComment_ = true;
1319 }
1320 }
1321 }
1322
1323 addComment(commentBegin, current_, placement);
1324 }
1325 return true;
1326 }
1327
normalizeEOL(OurReader::Location begin,OurReader::Location end)1328 String OurReader::normalizeEOL(OurReader::Location begin,
1329 OurReader::Location end) {
1330 String normalized;
1331 normalized.reserve(static_cast<size_t>(end - begin));
1332 OurReader::Location current = begin;
1333 while (current != end) {
1334 char c = *current++;
1335 if (c == '\r') {
1336 if (current != end && *current == '\n')
1337 // convert dos EOL
1338 ++current;
1339 // convert Mac EOL
1340 normalized += '\n';
1341 } else {
1342 normalized += c;
1343 }
1344 }
1345 return normalized;
1346 }
1347
addComment(Location begin,Location end,CommentPlacement placement)1348 void OurReader::addComment(Location begin, Location end,
1349 CommentPlacement placement) {
1350 assert(collectComments_);
1351 const String& normalized = normalizeEOL(begin, end);
1352 if (placement == commentAfterOnSameLine) {
1353 assert(lastValue_ != nullptr);
1354 lastValue_->setComment(normalized, placement);
1355 } else {
1356 commentsBefore_ += normalized;
1357 }
1358 }
1359
readCStyleComment(bool * containsNewLineResult)1360 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1361 *containsNewLineResult = false;
1362
1363 while ((current_ + 1) < end_) {
1364 Char c = getNextChar();
1365 if (c == '*' && *current_ == '/')
1366 break;
1367 if (c == '\n')
1368 *containsNewLineResult = true;
1369 }
1370
1371 return getNextChar() == '/';
1372 }
1373
readCppStyleComment()1374 bool OurReader::readCppStyleComment() {
1375 while (current_ != end_) {
1376 Char c = getNextChar();
1377 if (c == '\n')
1378 break;
1379 if (c == '\r') {
1380 // Consume DOS EOL. It will be normalized in addComment.
1381 if (current_ != end_ && *current_ == '\n')
1382 getNextChar();
1383 // Break on Moc OS 9 EOL.
1384 break;
1385 }
1386 }
1387 return true;
1388 }
1389
readNumber(bool checkInf)1390 bool OurReader::readNumber(bool checkInf) {
1391 Location p = current_;
1392 if (checkInf && p != end_ && *p == 'I') {
1393 current_ = ++p;
1394 return false;
1395 }
1396 char c = '0'; // stopgap for already consumed character
1397 // integral part
1398 while (c >= '0' && c <= '9')
1399 c = (current_ = p) < end_ ? *p++ : '\0';
1400 // fractional part
1401 if (c == '.') {
1402 c = (current_ = p) < end_ ? *p++ : '\0';
1403 while (c >= '0' && c <= '9')
1404 c = (current_ = p) < end_ ? *p++ : '\0';
1405 }
1406 // exponential part
1407 if (c == 'e' || c == 'E') {
1408 c = (current_ = p) < end_ ? *p++ : '\0';
1409 if (c == '+' || c == '-')
1410 c = (current_ = p) < end_ ? *p++ : '\0';
1411 while (c >= '0' && c <= '9')
1412 c = (current_ = p) < end_ ? *p++ : '\0';
1413 }
1414 return true;
1415 }
readString()1416 bool OurReader::readString() {
1417 Char c = 0;
1418 while (current_ != end_) {
1419 c = getNextChar();
1420 if (c == '\\')
1421 getNextChar();
1422 else if (c == '"')
1423 break;
1424 }
1425 return c == '"';
1426 }
1427
readStringSingleQuote()1428 bool OurReader::readStringSingleQuote() {
1429 Char c = 0;
1430 while (current_ != end_) {
1431 c = getNextChar();
1432 if (c == '\\')
1433 getNextChar();
1434 else if (c == '\'')
1435 break;
1436 }
1437 return c == '\'';
1438 }
1439
readObject(Token & token)1440 bool OurReader::readObject(Token& token) {
1441 Token tokenName;
1442 String name;
1443 Value init(objectValue);
1444 currentValue().swapPayload(init);
1445 currentValue().setOffsetStart(token.start_ - begin_);
1446 while (readToken(tokenName)) {
1447 bool initialTokenOk = true;
1448 while (tokenName.type_ == tokenComment && initialTokenOk)
1449 initialTokenOk = readToken(tokenName);
1450 if (!initialTokenOk)
1451 break;
1452 if (tokenName.type_ == tokenObjectEnd &&
1453 (name.empty() ||
1454 features_.allowTrailingCommas_)) // empty object or trailing comma
1455 return true;
1456 name.clear();
1457 if (tokenName.type_ == tokenString) {
1458 if (!decodeString(tokenName, name))
1459 return recoverFromError(tokenObjectEnd);
1460 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1461 Value numberName;
1462 if (!decodeNumber(tokenName, numberName))
1463 return recoverFromError(tokenObjectEnd);
1464 name = numberName.asString();
1465 } else {
1466 break;
1467 }
1468 if (name.length() >= (1U << 30))
1469 throwRuntimeError("keylength >= 2^30");
1470 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1471 String msg = "Duplicate key: '" + name + "'";
1472 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1473 }
1474
1475 Token colon;
1476 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1477 return addErrorAndRecover("Missing ':' after object member name", colon,
1478 tokenObjectEnd);
1479 }
1480 Value& value = currentValue()[name];
1481 nodes_.push(&value);
1482 bool ok = readValue();
1483 nodes_.pop();
1484 if (!ok) // error already set
1485 return recoverFromError(tokenObjectEnd);
1486
1487 Token comma;
1488 if (!readToken(comma) ||
1489 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1490 comma.type_ != tokenComment)) {
1491 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1492 comma, tokenObjectEnd);
1493 }
1494 bool finalizeTokenOk = true;
1495 while (comma.type_ == tokenComment && finalizeTokenOk)
1496 finalizeTokenOk = readToken(comma);
1497 if (comma.type_ == tokenObjectEnd)
1498 return true;
1499 }
1500 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1501 tokenObjectEnd);
1502 }
1503
readArray(Token & token)1504 bool OurReader::readArray(Token& token) {
1505 Value init(arrayValue);
1506 currentValue().swapPayload(init);
1507 currentValue().setOffsetStart(token.start_ - begin_);
1508 int index = 0;
1509 for (;;) {
1510 skipSpaces();
1511 if (current_ != end_ && *current_ == ']' &&
1512 (index == 0 ||
1513 (features_.allowTrailingCommas_ &&
1514 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1515 // comma
1516 {
1517 Token endArray;
1518 readToken(endArray);
1519 return true;
1520 }
1521 Value& value = currentValue()[index++];
1522 nodes_.push(&value);
1523 bool ok = readValue();
1524 nodes_.pop();
1525 if (!ok) // error already set
1526 return recoverFromError(tokenArrayEnd);
1527
1528 Token currentToken;
1529 // Accept Comment after last item in the array.
1530 ok = readToken(currentToken);
1531 while (currentToken.type_ == tokenComment && ok) {
1532 ok = readToken(currentToken);
1533 }
1534 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1535 currentToken.type_ != tokenArrayEnd);
1536 if (!ok || badTokenType) {
1537 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1538 currentToken, tokenArrayEnd);
1539 }
1540 if (currentToken.type_ == tokenArrayEnd)
1541 break;
1542 }
1543 return true;
1544 }
1545
decodeNumber(Token & token)1546 bool OurReader::decodeNumber(Token& token) {
1547 Value decoded;
1548 if (!decodeNumber(token, decoded))
1549 return false;
1550 currentValue().swapPayload(decoded);
1551 currentValue().setOffsetStart(token.start_ - begin_);
1552 currentValue().setOffsetLimit(token.end_ - begin_);
1553 return true;
1554 }
1555
decodeNumber(Token & token,Value & decoded)1556 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1557 // Attempts to parse the number as an integer. If the number is
1558 // larger than the maximum supported value of an integer then
1559 // we decode the number as a double.
1560 Location current = token.start_;
1561 const bool isNegative = *current == '-';
1562 if (isNegative) {
1563 ++current;
1564 }
1565
1566 // We assume we can represent the largest and smallest integer types as
1567 // unsigned integers with separate sign. This is only true if they can fit
1568 // into an unsigned integer.
1569 static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1570 "Int must be smaller than UInt");
1571
1572 // We need to convert minLargestInt into a positive number. The easiest way
1573 // to do this conversion is to assume our "threshold" value of minLargestInt
1574 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1575 // be a safe assumption.
1576 static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1577 "The absolute value of minLargestInt must be greater than or "
1578 "equal to maxLargestInt");
1579 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1580 "The absolute value of minLargestInt must be only 1 magnitude "
1581 "larger than maxLargest Int");
1582
1583 static constexpr Value::LargestUInt positive_threshold =
1584 Value::maxLargestUInt / 10;
1585 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1586
1587 // For the negative values, we have to be more careful. Since typically
1588 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1589 // then take the inverse. This assumes that minLargestInt is only a single
1590 // power of 10 different in magnitude, which we check above. For the last
1591 // digit, we take the modulus before negating for the same reason.
1592 static constexpr auto negative_threshold =
1593 Value::LargestUInt(-(Value::minLargestInt / 10));
1594 static constexpr auto negative_last_digit =
1595 Value::UInt(-(Value::minLargestInt % 10));
1596
1597 const Value::LargestUInt threshold =
1598 isNegative ? negative_threshold : positive_threshold;
1599 const Value::UInt max_last_digit =
1600 isNegative ? negative_last_digit : positive_last_digit;
1601
1602 Value::LargestUInt value = 0;
1603 while (current < token.end_) {
1604 Char c = *current++;
1605 if (c < '0' || c > '9')
1606 return decodeDouble(token, decoded);
1607
1608 const auto digit(static_cast<Value::UInt>(c - '0'));
1609 if (value >= threshold) {
1610 // We've hit or exceeded the max value divided by 10 (rounded down). If
1611 // a) we've only just touched the limit, meaing value == threshold,
1612 // b) this is the last digit, or
1613 // c) it's small enough to fit in that rounding delta, we're okay.
1614 // Otherwise treat this number as a double to avoid overflow.
1615 if (value > threshold || current != token.end_ ||
1616 digit > max_last_digit) {
1617 return decodeDouble(token, decoded);
1618 }
1619 }
1620 value = value * 10 + digit;
1621 }
1622
1623 if (isNegative) {
1624 // We use the same magnitude assumption here, just in case.
1625 const auto last_digit = static_cast<Value::UInt>(value % 10);
1626 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1627 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1628 decoded = Value::LargestInt(value);
1629 } else {
1630 decoded = value;
1631 }
1632
1633 return true;
1634 }
1635
decodeDouble(Token & token)1636 bool OurReader::decodeDouble(Token& token) {
1637 Value decoded;
1638 if (!decodeDouble(token, decoded))
1639 return false;
1640 currentValue().swapPayload(decoded);
1641 currentValue().setOffsetStart(token.start_ - begin_);
1642 currentValue().setOffsetLimit(token.end_ - begin_);
1643 return true;
1644 }
1645
decodeDouble(Token & token,Value & decoded)1646 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1647 double value = 0;
1648 const String buffer(token.start_, token.end_);
1649 IStringStream is(buffer);
1650 if (!(is >> value)) {
1651 return addError(
1652 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1653 }
1654 decoded = value;
1655 return true;
1656 }
1657
decodeString(Token & token)1658 bool OurReader::decodeString(Token& token) {
1659 String decoded_string;
1660 if (!decodeString(token, decoded_string))
1661 return false;
1662 Value decoded(decoded_string);
1663 currentValue().swapPayload(decoded);
1664 currentValue().setOffsetStart(token.start_ - begin_);
1665 currentValue().setOffsetLimit(token.end_ - begin_);
1666 return true;
1667 }
1668
decodeString(Token & token,String & decoded)1669 bool OurReader::decodeString(Token& token, String& decoded) {
1670 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1671 Location current = token.start_ + 1; // skip '"'
1672 Location end = token.end_ - 1; // do not include '"'
1673 while (current != end) {
1674 Char c = *current++;
1675 if (c == '"')
1676 break;
1677 if (c == '\\') {
1678 if (current == end)
1679 return addError("Empty escape sequence in string", token, current);
1680 Char escape = *current++;
1681 switch (escape) {
1682 case '"':
1683 decoded += '"';
1684 break;
1685 case '/':
1686 decoded += '/';
1687 break;
1688 case '\\':
1689 decoded += '\\';
1690 break;
1691 case 'b':
1692 decoded += '\b';
1693 break;
1694 case 'f':
1695 decoded += '\f';
1696 break;
1697 case 'n':
1698 decoded += '\n';
1699 break;
1700 case 'r':
1701 decoded += '\r';
1702 break;
1703 case 't':
1704 decoded += '\t';
1705 break;
1706 case 'u': {
1707 unsigned int unicode;
1708 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1709 return false;
1710 decoded += codePointToUTF8(unicode);
1711 } break;
1712 default:
1713 return addError("Bad escape sequence in string", token, current);
1714 }
1715 } else {
1716 decoded += c;
1717 }
1718 }
1719 return true;
1720 }
1721
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1722 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1723 Location end, unsigned int& unicode) {
1724
1725 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1726 return false;
1727 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1728 // surrogate pairs
1729 if (end - current < 6)
1730 return addError(
1731 "additional six characters expected to parse unicode surrogate pair.",
1732 token, current);
1733 if (*(current++) == '\\' && *(current++) == 'u') {
1734 unsigned int surrogatePair;
1735 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1736 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1737 } else
1738 return false;
1739 } else
1740 return addError("expecting another \\u token to begin the second half of "
1741 "a unicode surrogate pair",
1742 token, current);
1743 }
1744 return true;
1745 }
1746
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1747 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1748 Location end,
1749 unsigned int& ret_unicode) {
1750 if (end - current < 4)
1751 return addError(
1752 "Bad unicode escape sequence in string: four digits expected.", token,
1753 current);
1754 int unicode = 0;
1755 for (int index = 0; index < 4; ++index) {
1756 Char c = *current++;
1757 unicode *= 16;
1758 if (c >= '0' && c <= '9')
1759 unicode += c - '0';
1760 else if (c >= 'a' && c <= 'f')
1761 unicode += c - 'a' + 10;
1762 else if (c >= 'A' && c <= 'F')
1763 unicode += c - 'A' + 10;
1764 else
1765 return addError(
1766 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1767 token, current);
1768 }
1769 ret_unicode = static_cast<unsigned int>(unicode);
1770 return true;
1771 }
1772
addError(const String & message,Token & token,Location extra)1773 bool OurReader::addError(const String& message, Token& token, Location extra) {
1774 ErrorInfo info;
1775 info.token_ = token;
1776 info.message_ = message;
1777 info.extra_ = extra;
1778 errors_.push_back(info);
1779 return false;
1780 }
1781
recoverFromError(TokenType skipUntilToken)1782 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1783 size_t errorCount = errors_.size();
1784 Token skip;
1785 for (;;) {
1786 if (!readToken(skip))
1787 errors_.resize(errorCount); // discard errors caused by recovery
1788 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1789 break;
1790 }
1791 errors_.resize(errorCount);
1792 return false;
1793 }
1794
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1795 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1796 TokenType skipUntilToken) {
1797 addError(message, token);
1798 return recoverFromError(skipUntilToken);
1799 }
1800
currentValue()1801 Value& OurReader::currentValue() { return *(nodes_.top()); }
1802
getNextChar()1803 OurReader::Char OurReader::getNextChar() {
1804 if (current_ == end_)
1805 return 0;
1806 return *current_++;
1807 }
1808
getLocationLineAndColumn(Location location,int & line,int & column) const1809 void OurReader::getLocationLineAndColumn(Location location, int& line,
1810 int& column) const {
1811 Location current = begin_;
1812 Location lastLineStart = current;
1813 line = 0;
1814 while (current < location && current != end_) {
1815 Char c = *current++;
1816 if (c == '\r') {
1817 if (*current == '\n')
1818 ++current;
1819 lastLineStart = current;
1820 ++line;
1821 } else if (c == '\n') {
1822 lastLineStart = current;
1823 ++line;
1824 }
1825 }
1826 // column & line start at 1
1827 column = int(location - lastLineStart) + 1;
1828 ++line;
1829 }
1830
getLocationLineAndColumn(Location location) const1831 String OurReader::getLocationLineAndColumn(Location location) const {
1832 int line, column;
1833 getLocationLineAndColumn(location, line, column);
1834 char buffer[18 + 16 + 16 + 1];
1835 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1836 return buffer;
1837 }
1838
getFormattedErrorMessages() const1839 String OurReader::getFormattedErrorMessages() const {
1840 String formattedMessage;
1841 for (const auto& error : errors_) {
1842 formattedMessage +=
1843 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1844 formattedMessage += " " + error.message_ + "\n";
1845 if (error.extra_)
1846 formattedMessage +=
1847 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1848 }
1849 return formattedMessage;
1850 }
1851
getStructuredErrors() const1852 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1853 std::vector<OurReader::StructuredError> allErrors;
1854 for (const auto& error : errors_) {
1855 OurReader::StructuredError structured;
1856 structured.offset_start = error.token_.start_ - begin_;
1857 structured.offset_limit = error.token_.end_ - begin_;
1858 structured.message = error.message_;
1859 allErrors.push_back(structured);
1860 }
1861 return allErrors;
1862 }
1863
1864 class OurCharReader : public CharReader {
1865 bool const collectComments_;
1866 OurReader reader_;
1867
1868 public:
OurCharReader(bool collectComments,OurFeatures const & features)1869 OurCharReader(bool collectComments, OurFeatures const& features)
1870 : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1871 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1872 String* errs) override {
1873 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1874 if (errs) {
1875 *errs = reader_.getFormattedErrorMessages();
1876 }
1877 return ok;
1878 }
1879 };
1880
CharReaderBuilder()1881 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1882 CharReaderBuilder::~CharReaderBuilder() = default;
newCharReader() const1883 CharReader* CharReaderBuilder::newCharReader() const {
1884 bool collectComments = settings_["collectComments"].asBool();
1885 OurFeatures features = OurFeatures::all();
1886 features.allowComments_ = settings_["allowComments"].asBool();
1887 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1888 features.strictRoot_ = settings_["strictRoot"].asBool();
1889 features.allowDroppedNullPlaceholders_ =
1890 settings_["allowDroppedNullPlaceholders"].asBool();
1891 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1892 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1893
1894 // Stack limit is always a size_t, so we get this as an unsigned int
1895 // regardless of it we have 64-bit integer support enabled.
1896 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1897 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1898 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1899 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1900 features.skipBom_ = settings_["skipBom"].asBool();
1901 return new OurCharReader(collectComments, features);
1902 }
1903
validate(Json::Value * invalid) const1904 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1905 static const auto& valid_keys = *new std::set<String>{
1906 "collectComments",
1907 "allowComments",
1908 "allowTrailingCommas",
1909 "strictRoot",
1910 "allowDroppedNullPlaceholders",
1911 "allowNumericKeys",
1912 "allowSingleQuotes",
1913 "stackLimit",
1914 "failIfExtra",
1915 "rejectDupKeys",
1916 "allowSpecialFloats",
1917 "skipBom",
1918 };
1919 for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1920 auto key = si.name();
1921 if (valid_keys.count(key))
1922 continue;
1923 if (invalid)
1924 (*invalid)[std::move(key)] = *si;
1925 else
1926 return false;
1927 }
1928 return invalid ? invalid->empty() : true;
1929 }
1930
operator [](const String & key)1931 Value& CharReaderBuilder::operator[](const String& key) {
1932 return settings_[key];
1933 }
1934 // static
strictMode(Json::Value * settings)1935 void CharReaderBuilder::strictMode(Json::Value* settings) {
1936 //! [CharReaderBuilderStrictMode]
1937 (*settings)["allowComments"] = false;
1938 (*settings)["allowTrailingCommas"] = false;
1939 (*settings)["strictRoot"] = true;
1940 (*settings)["allowDroppedNullPlaceholders"] = false;
1941 (*settings)["allowNumericKeys"] = false;
1942 (*settings)["allowSingleQuotes"] = false;
1943 (*settings)["stackLimit"] = 1000;
1944 (*settings)["failIfExtra"] = true;
1945 (*settings)["rejectDupKeys"] = true;
1946 (*settings)["allowSpecialFloats"] = false;
1947 (*settings)["skipBom"] = true;
1948 //! [CharReaderBuilderStrictMode]
1949 }
1950 // static
setDefaults(Json::Value * settings)1951 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1952 //! [CharReaderBuilderDefaults]
1953 (*settings)["collectComments"] = true;
1954 (*settings)["allowComments"] = true;
1955 (*settings)["allowTrailingCommas"] = true;
1956 (*settings)["strictRoot"] = false;
1957 (*settings)["allowDroppedNullPlaceholders"] = false;
1958 (*settings)["allowNumericKeys"] = false;
1959 (*settings)["allowSingleQuotes"] = false;
1960 (*settings)["stackLimit"] = 1000;
1961 (*settings)["failIfExtra"] = false;
1962 (*settings)["rejectDupKeys"] = false;
1963 (*settings)["allowSpecialFloats"] = false;
1964 (*settings)["skipBom"] = true;
1965 //! [CharReaderBuilderDefaults]
1966 }
1967
1968 //////////////////////////////////
1969 // global functions
1970
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1971 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1972 String* errs) {
1973 OStringStream ssin;
1974 ssin << sin.rdbuf();
1975 String doc = ssin.str();
1976 char const* begin = doc.data();
1977 char const* end = begin + doc.size();
1978 // Note that we do not actually need a null-terminator.
1979 CharReaderPtr const reader(fact.newCharReader());
1980 return reader->parse(begin, end, root, errs);
1981 }
1982
operator >>(IStream & sin,Value & root)1983 IStream& operator>>(IStream& sin, Value& root) {
1984 CharReaderBuilder b;
1985 String errs;
1986 bool ok = parseFromStream(b, sin, &root, &errs);
1987 if (!ok) {
1988 throwRuntimeError(errs);
1989 }
1990 return sin;
1991 }
1992
1993 } // namespace Json
1994