1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include <cmath>
8 #include <utility>
9
10 #include "base/logging.h"
11 #include "base/macros.h"
12 #include "base/memory/ptr_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversion_utils.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/third_party/icu/icu_utf.h"
20 #include "base/values.h"
21
22 namespace base {
23 namespace internal {
24
25 namespace {
26
27 const int kStackMaxDepth = 100;
28
29 const int32_t kExtendedASCIIStart = 0x80;
30
31 // DictionaryHiddenRootValue and ListHiddenRootValue are used in conjunction
32 // with JSONStringValue as an optimization for reducing the number of string
33 // copies. When this optimization is active, the parser uses a hidden root to
34 // keep the original JSON input string live and creates JSONStringValue children
35 // holding StringPiece references to the input string, avoiding about 2/3rds of
36 // string memory copies. The real root value is Swap()ed into the new instance.
37 class DictionaryHiddenRootValue : public DictionaryValue {
38 public:
DictionaryHiddenRootValue(std::unique_ptr<std::string> json,std::unique_ptr<Value> root)39 DictionaryHiddenRootValue(std::unique_ptr<std::string> json,
40 std::unique_ptr<Value> root)
41 : json_(std::move(json)) {
42 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
43 DictionaryValue::Swap(static_cast<DictionaryValue*>(root.get()));
44 }
45
Swap(DictionaryValue * other)46 void Swap(DictionaryValue* other) override {
47 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
48
49 // First deep copy to convert JSONStringValue to std::string and swap that
50 // copy with |other|, which contains the new contents of |this|.
51 std::unique_ptr<DictionaryValue> copy(CreateDeepCopy());
52 copy->Swap(other);
53
54 // Then erase the contents of the current dictionary and swap in the
55 // new contents, originally from |other|.
56 Clear();
57 json_.reset();
58 DictionaryValue::Swap(copy.get());
59 }
60
61 // Not overriding DictionaryValue::Remove because it just calls through to
62 // the method below.
63
RemoveWithoutPathExpansion(const std::string & key,std::unique_ptr<Value> * out)64 bool RemoveWithoutPathExpansion(const std::string& key,
65 std::unique_ptr<Value>* out) override {
66 // If the caller won't take ownership of the removed value, just call up.
67 if (!out)
68 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
69
70 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
71
72 // Otherwise, remove the value while its still "owned" by this and copy it
73 // to convert any JSONStringValues to std::string.
74 std::unique_ptr<Value> out_owned;
75 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
76 return false;
77
78 *out = out_owned->CreateDeepCopy();
79
80 return true;
81 }
82
83 private:
84 std::unique_ptr<std::string> json_;
85
86 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
87 };
88
89 class ListHiddenRootValue : public ListValue {
90 public:
ListHiddenRootValue(std::unique_ptr<std::string> json,std::unique_ptr<Value> root)91 ListHiddenRootValue(std::unique_ptr<std::string> json,
92 std::unique_ptr<Value> root)
93 : json_(std::move(json)) {
94 DCHECK(root->IsType(Value::TYPE_LIST));
95 ListValue::Swap(static_cast<ListValue*>(root.get()));
96 }
97
Swap(ListValue * other)98 void Swap(ListValue* other) override {
99 DVLOG(1) << "Swap()ing a ListValue inefficiently.";
100
101 // First deep copy to convert JSONStringValue to std::string and swap that
102 // copy with |other|, which contains the new contents of |this|.
103 std::unique_ptr<ListValue> copy(CreateDeepCopy());
104 copy->Swap(other);
105
106 // Then erase the contents of the current list and swap in the new contents,
107 // originally from |other|.
108 Clear();
109 json_.reset();
110 ListValue::Swap(copy.get());
111 }
112
Remove(size_t index,std::unique_ptr<Value> * out)113 bool Remove(size_t index, std::unique_ptr<Value>* out) override {
114 // If the caller won't take ownership of the removed value, just call up.
115 if (!out)
116 return ListValue::Remove(index, out);
117
118 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
119
120 // Otherwise, remove the value while its still "owned" by this and copy it
121 // to convert any JSONStringValues to std::string.
122 std::unique_ptr<Value> out_owned;
123 if (!ListValue::Remove(index, &out_owned))
124 return false;
125
126 *out = out_owned->CreateDeepCopy();
127
128 return true;
129 }
130
131 private:
132 std::unique_ptr<std::string> json_;
133
134 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
135 };
136
137 // A variant on StringValue that uses StringPiece instead of copying the string
138 // into the Value. This can only be stored in a child of hidden root (above),
139 // otherwise the referenced string will not be guaranteed to outlive it.
140 class JSONStringValue : public Value {
141 public:
JSONStringValue(StringPiece piece)142 explicit JSONStringValue(StringPiece piece)
143 : Value(TYPE_STRING), string_piece_(piece) {}
144
145 // Overridden from Value:
GetAsString(std::string * out_value) const146 bool GetAsString(std::string* out_value) const override {
147 string_piece_.CopyToString(out_value);
148 return true;
149 }
GetAsString(string16 * out_value) const150 bool GetAsString(string16* out_value) const override {
151 *out_value = UTF8ToUTF16(string_piece_);
152 return true;
153 }
DeepCopy() const154 Value* DeepCopy() const override {
155 return new StringValue(string_piece_.as_string());
156 }
Equals(const Value * other) const157 bool Equals(const Value* other) const override {
158 std::string other_string;
159 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
160 StringPiece(other_string) == string_piece_;
161 }
162
163 private:
164 // The location in the original input stream.
165 StringPiece string_piece_;
166
167 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
168 };
169
170 // Simple class that checks for maximum recursion/"stack overflow."
171 class StackMarker {
172 public:
StackMarker(int * depth)173 explicit StackMarker(int* depth) : depth_(depth) {
174 ++(*depth_);
175 DCHECK_LE(*depth_, kStackMaxDepth);
176 }
~StackMarker()177 ~StackMarker() {
178 --(*depth_);
179 }
180
IsTooDeep() const181 bool IsTooDeep() const {
182 return *depth_ >= kStackMaxDepth;
183 }
184
185 private:
186 int* const depth_;
187
188 DISALLOW_COPY_AND_ASSIGN(StackMarker);
189 };
190
191 } // namespace
192
JSONParser(int options)193 JSONParser::JSONParser(int options)
194 : options_(options),
195 start_pos_(nullptr),
196 pos_(nullptr),
197 end_pos_(nullptr),
198 index_(0),
199 stack_depth_(0),
200 line_number_(0),
201 index_last_line_(0),
202 error_code_(JSONReader::JSON_NO_ERROR),
203 error_line_(0),
204 error_column_(0) {
205 }
206
~JSONParser()207 JSONParser::~JSONParser() {
208 }
209
Parse(StringPiece input)210 std::unique_ptr<Value> JSONParser::Parse(StringPiece input) {
211 std::unique_ptr<std::string> input_copy;
212 // If the children of a JSON root can be detached, then hidden roots cannot
213 // be used, so do not bother copying the input because StringPiece will not
214 // be used anywhere.
215 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
216 input_copy = MakeUnique<std::string>(input.as_string());
217 start_pos_ = input_copy->data();
218 } else {
219 start_pos_ = input.data();
220 }
221 pos_ = start_pos_;
222 end_pos_ = start_pos_ + input.length();
223 index_ = 0;
224 line_number_ = 1;
225 index_last_line_ = 0;
226
227 error_code_ = JSONReader::JSON_NO_ERROR;
228 error_line_ = 0;
229 error_column_ = 0;
230
231 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
232 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
233 // ParseNextToken function mis-treating a Unicode BOM as an invalid
234 // character and returning NULL.
235 if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
236 static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
237 static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
238 NextNChars(3);
239 }
240
241 // Parse the first and any nested tokens.
242 std::unique_ptr<Value> root(ParseNextToken());
243 if (!root)
244 return nullptr;
245
246 // Make sure the input stream is at an end.
247 if (GetNextToken() != T_END_OF_INPUT) {
248 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
249 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
250 return nullptr;
251 }
252 }
253
254 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
255 // hidden root.
256 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
257 if (root->IsType(Value::TYPE_DICTIONARY)) {
258 return MakeUnique<DictionaryHiddenRootValue>(std::move(input_copy),
259 std::move(root));
260 }
261 if (root->IsType(Value::TYPE_LIST)) {
262 return MakeUnique<ListHiddenRootValue>(std::move(input_copy),
263 std::move(root));
264 }
265 if (root->IsType(Value::TYPE_STRING)) {
266 // A string type could be a JSONStringValue, but because there's no
267 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
268 // preserve it.
269 return root->CreateDeepCopy();
270 }
271 }
272
273 // All other values can be returned directly.
274 return root;
275 }
276
error_code() const277 JSONReader::JsonParseError JSONParser::error_code() const {
278 return error_code_;
279 }
280
GetErrorMessage() const281 std::string JSONParser::GetErrorMessage() const {
282 return FormatErrorMessage(error_line_, error_column_,
283 JSONReader::ErrorCodeToString(error_code_));
284 }
285
error_line() const286 int JSONParser::error_line() const {
287 return error_line_;
288 }
289
error_column() const290 int JSONParser::error_column() const {
291 return error_column_;
292 }
293
294 // StringBuilder ///////////////////////////////////////////////////////////////
295
StringBuilder()296 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
297
StringBuilder(const char * pos)298 JSONParser::StringBuilder::StringBuilder(const char* pos)
299 : pos_(pos),
300 length_(0),
301 string_(nullptr) {
302 }
303
Swap(StringBuilder * other)304 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
305 std::swap(other->string_, string_);
306 std::swap(other->pos_, pos_);
307 std::swap(other->length_, length_);
308 }
309
~StringBuilder()310 JSONParser::StringBuilder::~StringBuilder() {
311 delete string_;
312 }
313
Append(const char & c)314 void JSONParser::StringBuilder::Append(const char& c) {
315 DCHECK_GE(c, 0);
316 DCHECK_LT(static_cast<unsigned char>(c), 128);
317
318 if (string_)
319 string_->push_back(c);
320 else
321 ++length_;
322 }
323
AppendString(const std::string & str)324 void JSONParser::StringBuilder::AppendString(const std::string& str) {
325 DCHECK(string_);
326 string_->append(str);
327 }
328
Convert()329 void JSONParser::StringBuilder::Convert() {
330 if (string_)
331 return;
332 string_ = new std::string(pos_, length_);
333 }
334
CanBeStringPiece() const335 bool JSONParser::StringBuilder::CanBeStringPiece() const {
336 return !string_;
337 }
338
AsStringPiece()339 StringPiece JSONParser::StringBuilder::AsStringPiece() {
340 if (string_)
341 return StringPiece();
342 return StringPiece(pos_, length_);
343 }
344
AsString()345 const std::string& JSONParser::StringBuilder::AsString() {
346 if (!string_)
347 Convert();
348 return *string_;
349 }
350
351 // JSONParser private //////////////////////////////////////////////////////////
352
CanConsume(int length)353 inline bool JSONParser::CanConsume(int length) {
354 return pos_ + length <= end_pos_;
355 }
356
NextChar()357 const char* JSONParser::NextChar() {
358 DCHECK(CanConsume(1));
359 ++index_;
360 ++pos_;
361 return pos_;
362 }
363
NextNChars(int n)364 void JSONParser::NextNChars(int n) {
365 DCHECK(CanConsume(n));
366 index_ += n;
367 pos_ += n;
368 }
369
GetNextToken()370 JSONParser::Token JSONParser::GetNextToken() {
371 EatWhitespaceAndComments();
372 if (!CanConsume(1))
373 return T_END_OF_INPUT;
374
375 switch (*pos_) {
376 case '{':
377 return T_OBJECT_BEGIN;
378 case '}':
379 return T_OBJECT_END;
380 case '[':
381 return T_ARRAY_BEGIN;
382 case ']':
383 return T_ARRAY_END;
384 case '"':
385 return T_STRING;
386 case '0':
387 case '1':
388 case '2':
389 case '3':
390 case '4':
391 case '5':
392 case '6':
393 case '7':
394 case '8':
395 case '9':
396 case '-':
397 return T_NUMBER;
398 case 't':
399 return T_BOOL_TRUE;
400 case 'f':
401 return T_BOOL_FALSE;
402 case 'n':
403 return T_NULL;
404 case ',':
405 return T_LIST_SEPARATOR;
406 case ':':
407 return T_OBJECT_PAIR_SEPARATOR;
408 default:
409 return T_INVALID_TOKEN;
410 }
411 }
412
EatWhitespaceAndComments()413 void JSONParser::EatWhitespaceAndComments() {
414 while (pos_ < end_pos_) {
415 switch (*pos_) {
416 case '\r':
417 case '\n':
418 index_last_line_ = index_;
419 // Don't increment line_number_ twice for "\r\n".
420 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
421 ++line_number_;
422 // Fall through.
423 case ' ':
424 case '\t':
425 NextChar();
426 break;
427 case '/':
428 if (!EatComment())
429 return;
430 break;
431 default:
432 return;
433 }
434 }
435 }
436
EatComment()437 bool JSONParser::EatComment() {
438 if (*pos_ != '/' || !CanConsume(1))
439 return false;
440
441 char next_char = *NextChar();
442 if (next_char == '/') {
443 // Single line comment, read to newline.
444 while (CanConsume(1)) {
445 next_char = *NextChar();
446 if (next_char == '\n' || next_char == '\r')
447 return true;
448 }
449 } else if (next_char == '*') {
450 char previous_char = '\0';
451 // Block comment, read until end marker.
452 while (CanConsume(1)) {
453 next_char = *NextChar();
454 if (previous_char == '*' && next_char == '/') {
455 // EatWhitespaceAndComments will inspect pos_, which will still be on
456 // the last / of the comment, so advance once more (which may also be
457 // end of input).
458 NextChar();
459 return true;
460 }
461 previous_char = next_char;
462 }
463
464 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
465 }
466
467 return false;
468 }
469
ParseNextToken()470 Value* JSONParser::ParseNextToken() {
471 return ParseToken(GetNextToken());
472 }
473
ParseToken(Token token)474 Value* JSONParser::ParseToken(Token token) {
475 switch (token) {
476 case T_OBJECT_BEGIN:
477 return ConsumeDictionary();
478 case T_ARRAY_BEGIN:
479 return ConsumeList();
480 case T_STRING:
481 return ConsumeString();
482 case T_NUMBER:
483 return ConsumeNumber();
484 case T_BOOL_TRUE:
485 case T_BOOL_FALSE:
486 case T_NULL:
487 return ConsumeLiteral();
488 default:
489 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
490 return nullptr;
491 }
492 }
493
ConsumeDictionary()494 Value* JSONParser::ConsumeDictionary() {
495 if (*pos_ != '{') {
496 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
497 return nullptr;
498 }
499
500 StackMarker depth_check(&stack_depth_);
501 if (depth_check.IsTooDeep()) {
502 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
503 return nullptr;
504 }
505
506 std::unique_ptr<DictionaryValue> dict(new DictionaryValue);
507
508 NextChar();
509 Token token = GetNextToken();
510 while (token != T_OBJECT_END) {
511 if (token != T_STRING) {
512 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
513 return nullptr;
514 }
515
516 // First consume the key.
517 StringBuilder key;
518 if (!ConsumeStringRaw(&key)) {
519 return nullptr;
520 }
521
522 // Read the separator.
523 NextChar();
524 token = GetNextToken();
525 if (token != T_OBJECT_PAIR_SEPARATOR) {
526 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
527 return nullptr;
528 }
529
530 // The next token is the value. Ownership transfers to |dict|.
531 NextChar();
532 Value* value = ParseNextToken();
533 if (!value) {
534 // ReportError from deeper level.
535 return nullptr;
536 }
537
538 dict->SetWithoutPathExpansion(key.AsString(), value);
539
540 NextChar();
541 token = GetNextToken();
542 if (token == T_LIST_SEPARATOR) {
543 NextChar();
544 token = GetNextToken();
545 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
546 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
547 return nullptr;
548 }
549 } else if (token != T_OBJECT_END) {
550 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
551 return nullptr;
552 }
553 }
554
555 return dict.release();
556 }
557
ConsumeList()558 Value* JSONParser::ConsumeList() {
559 if (*pos_ != '[') {
560 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
561 return nullptr;
562 }
563
564 StackMarker depth_check(&stack_depth_);
565 if (depth_check.IsTooDeep()) {
566 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
567 return nullptr;
568 }
569
570 std::unique_ptr<ListValue> list(new ListValue);
571
572 NextChar();
573 Token token = GetNextToken();
574 while (token != T_ARRAY_END) {
575 Value* item = ParseToken(token);
576 if (!item) {
577 // ReportError from deeper level.
578 return nullptr;
579 }
580
581 list->Append(item);
582
583 NextChar();
584 token = GetNextToken();
585 if (token == T_LIST_SEPARATOR) {
586 NextChar();
587 token = GetNextToken();
588 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
589 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
590 return nullptr;
591 }
592 } else if (token != T_ARRAY_END) {
593 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
594 return nullptr;
595 }
596 }
597
598 return list.release();
599 }
600
ConsumeString()601 Value* JSONParser::ConsumeString() {
602 StringBuilder string;
603 if (!ConsumeStringRaw(&string))
604 return nullptr;
605
606 // Create the Value representation, using a hidden root, if configured
607 // to do so, and if the string can be represented by StringPiece.
608 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN))
609 return new JSONStringValue(string.AsStringPiece());
610
611 if (string.CanBeStringPiece())
612 string.Convert();
613 return new StringValue(string.AsString());
614 }
615
ConsumeStringRaw(StringBuilder * out)616 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
617 if (*pos_ != '"') {
618 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
619 return false;
620 }
621
622 // StringBuilder will internally build a StringPiece unless a UTF-16
623 // conversion occurs, at which point it will perform a copy into a
624 // std::string.
625 StringBuilder string(NextChar());
626
627 int length = end_pos_ - start_pos_;
628 int32_t next_char = 0;
629
630 while (CanConsume(1)) {
631 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
632 CBU8_NEXT(start_pos_, index_, length, next_char);
633 if (next_char < 0 || !IsValidCharacter(next_char)) {
634 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
635 return false;
636 }
637
638 if (next_char == '"') {
639 --index_; // Rewind by one because of CBU8_NEXT.
640 out->Swap(&string);
641 return true;
642 }
643
644 // If this character is not an escape sequence...
645 if (next_char != '\\') {
646 if (next_char < kExtendedASCIIStart)
647 string.Append(static_cast<char>(next_char));
648 else
649 DecodeUTF8(next_char, &string);
650 } else {
651 // And if it is an escape sequence, the input string will be adjusted
652 // (either by combining the two characters of an encoded escape sequence,
653 // or with a UTF conversion), so using StringPiece isn't possible -- force
654 // a conversion.
655 string.Convert();
656
657 if (!CanConsume(1)) {
658 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
659 return false;
660 }
661
662 switch (*NextChar()) {
663 // Allowed esape sequences:
664 case 'x': { // UTF-8 sequence.
665 // UTF-8 \x escape sequences are not allowed in the spec, but they
666 // are supported here for backwards-compatiblity with the old parser.
667 if (!CanConsume(2)) {
668 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
669 return false;
670 }
671
672 int hex_digit = 0;
673 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
674 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
675 return false;
676 }
677 NextChar();
678
679 if (hex_digit < kExtendedASCIIStart)
680 string.Append(static_cast<char>(hex_digit));
681 else
682 DecodeUTF8(hex_digit, &string);
683 break;
684 }
685 case 'u': { // UTF-16 sequence.
686 // UTF units are of the form \uXXXX.
687 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
688 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
689 return false;
690 }
691
692 // Skip the 'u'.
693 NextChar();
694
695 std::string utf8_units;
696 if (!DecodeUTF16(&utf8_units)) {
697 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
698 return false;
699 }
700
701 string.AppendString(utf8_units);
702 break;
703 }
704 case '"':
705 string.Append('"');
706 break;
707 case '\\':
708 string.Append('\\');
709 break;
710 case '/':
711 string.Append('/');
712 break;
713 case 'b':
714 string.Append('\b');
715 break;
716 case 'f':
717 string.Append('\f');
718 break;
719 case 'n':
720 string.Append('\n');
721 break;
722 case 'r':
723 string.Append('\r');
724 break;
725 case 't':
726 string.Append('\t');
727 break;
728 case 'v': // Not listed as valid escape sequence in the RFC.
729 string.Append('\v');
730 break;
731 // All other escape squences are illegal.
732 default:
733 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
734 return false;
735 }
736 }
737 }
738
739 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
740 return false;
741 }
742
743 // Entry is at the first X in \uXXXX.
DecodeUTF16(std::string * dest_string)744 bool JSONParser::DecodeUTF16(std::string* dest_string) {
745 if (!CanConsume(4))
746 return false;
747
748 // This is a 32-bit field because the shift operations in the
749 // conversion process below cause MSVC to error about "data loss."
750 // This only stores UTF-16 code units, though.
751 // Consume the UTF-16 code unit, which may be a high surrogate.
752 int code_unit16_high = 0;
753 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
754 return false;
755
756 // Only add 3, not 4, because at the end of this iteration, the parser has
757 // finished working with the last digit of the UTF sequence, meaning that
758 // the next iteration will advance to the next byte.
759 NextNChars(3);
760
761 // Used to convert the UTF-16 code units to a code point and then to a UTF-8
762 // code unit sequence.
763 char code_unit8[8] = { 0 };
764 size_t offset = 0;
765
766 // If this is a high surrogate, consume the next code unit to get the
767 // low surrogate.
768 if (CBU16_IS_SURROGATE(code_unit16_high)) {
769 // Make sure this is the high surrogate. If not, it's an encoding
770 // error.
771 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
772 return false;
773
774 // Make sure that the token has more characters to consume the
775 // lower surrogate.
776 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
777 return false;
778 if (*NextChar() != '\\' || *NextChar() != 'u')
779 return false;
780
781 NextChar(); // Read past 'u'.
782 int code_unit16_low = 0;
783 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
784 return false;
785
786 NextNChars(3);
787
788 if (!CBU16_IS_TRAIL(code_unit16_low)) {
789 return false;
790 }
791
792 uint32_t code_point =
793 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
794 if (!IsValidCharacter(code_point))
795 return false;
796
797 offset = 0;
798 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
799 } else {
800 // Not a surrogate.
801 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
802 if (!IsValidCharacter(code_unit16_high))
803 return false;
804
805 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
806 }
807
808 dest_string->append(code_unit8);
809 return true;
810 }
811
DecodeUTF8(const int32_t & point,StringBuilder * dest)812 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
813 DCHECK(IsValidCharacter(point));
814
815 // Anything outside of the basic ASCII plane will need to be decoded from
816 // int32_t to a multi-byte sequence.
817 if (point < kExtendedASCIIStart) {
818 dest->Append(static_cast<char>(point));
819 } else {
820 char utf8_units[4] = { 0 };
821 int offset = 0;
822 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
823 dest->Convert();
824 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
825 // zero terminated at this point. |offset| contains the correct length.
826 dest->AppendString(std::string(utf8_units, offset));
827 }
828 }
829
ConsumeNumber()830 Value* JSONParser::ConsumeNumber() {
831 const char* num_start = pos_;
832 const int start_index = index_;
833 int end_index = start_index;
834
835 if (*pos_ == '-')
836 NextChar();
837
838 if (!ReadInt(false)) {
839 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
840 return nullptr;
841 }
842 end_index = index_;
843
844 // The optional fraction part.
845 if (*pos_ == '.') {
846 if (!CanConsume(1)) {
847 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
848 return nullptr;
849 }
850 NextChar();
851 if (!ReadInt(true)) {
852 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
853 return nullptr;
854 }
855 end_index = index_;
856 }
857
858 // Optional exponent part.
859 if (*pos_ == 'e' || *pos_ == 'E') {
860 NextChar();
861 if (*pos_ == '-' || *pos_ == '+')
862 NextChar();
863 if (!ReadInt(true)) {
864 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
865 return nullptr;
866 }
867 end_index = index_;
868 }
869
870 // ReadInt is greedy because numbers have no easily detectable sentinel,
871 // so save off where the parser should be on exit (see Consume invariant at
872 // the top of the header), then make sure the next token is one which is
873 // valid.
874 const char* exit_pos = pos_ - 1;
875 int exit_index = index_ - 1;
876
877 switch (GetNextToken()) {
878 case T_OBJECT_END:
879 case T_ARRAY_END:
880 case T_LIST_SEPARATOR:
881 case T_END_OF_INPUT:
882 break;
883 default:
884 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
885 return nullptr;
886 }
887
888 pos_ = exit_pos;
889 index_ = exit_index;
890
891 StringPiece num_string(num_start, end_index - start_index);
892
893 int num_int;
894 if (StringToInt(num_string, &num_int))
895 return new FundamentalValue(num_int);
896
897 double num_double;
898 if (StringToDouble(num_string.as_string(), &num_double) &&
899 std::isfinite(num_double)) {
900 return new FundamentalValue(num_double);
901 }
902
903 return nullptr;
904 }
905
ReadInt(bool allow_leading_zeros)906 bool JSONParser::ReadInt(bool allow_leading_zeros) {
907 char first = *pos_;
908 int len = 0;
909
910 char c = first;
911 while (CanConsume(1) && IsAsciiDigit(c)) {
912 c = *NextChar();
913 ++len;
914 }
915
916 if (len == 0)
917 return false;
918
919 if (!allow_leading_zeros && len > 1 && first == '0')
920 return false;
921
922 return true;
923 }
924
ConsumeLiteral()925 Value* JSONParser::ConsumeLiteral() {
926 switch (*pos_) {
927 case 't': {
928 const char kTrueLiteral[] = "true";
929 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
930 if (!CanConsume(kTrueLen - 1) ||
931 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
932 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
933 return nullptr;
934 }
935 NextNChars(kTrueLen - 1);
936 return new FundamentalValue(true);
937 }
938 case 'f': {
939 const char kFalseLiteral[] = "false";
940 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
941 if (!CanConsume(kFalseLen - 1) ||
942 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
943 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
944 return nullptr;
945 }
946 NextNChars(kFalseLen - 1);
947 return new FundamentalValue(false);
948 }
949 case 'n': {
950 const char kNullLiteral[] = "null";
951 const int kNullLen = static_cast<int>(strlen(kNullLiteral));
952 if (!CanConsume(kNullLen - 1) ||
953 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
954 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
955 return nullptr;
956 }
957 NextNChars(kNullLen - 1);
958 return Value::CreateNullValue().release();
959 }
960 default:
961 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
962 return nullptr;
963 }
964 }
965
966 // static
StringsAreEqual(const char * one,const char * two,size_t len)967 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
968 return strncmp(one, two, len) == 0;
969 }
970
ReportError(JSONReader::JsonParseError code,int column_adjust)971 void JSONParser::ReportError(JSONReader::JsonParseError code,
972 int column_adjust) {
973 error_code_ = code;
974 error_line_ = line_number_;
975 error_column_ = index_ - index_last_line_ + column_adjust;
976 }
977
978 // static
FormatErrorMessage(int line,int column,const std::string & description)979 std::string JSONParser::FormatErrorMessage(int line, int column,
980 const std::string& description) {
981 if (line || column) {
982 return StringPrintf("Line: %i, column: %i, %s",
983 line, column, description.c_str());
984 }
985 return description;
986 }
987
988 } // namespace internal
989 } // namespace base
990