• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "base/json/json_parser.h"
11 
12 #include <cmath>
13 #include <iterator>
14 #include <string_view>
15 #include <utility>
16 #include <vector>
17 
18 #include "base/check_op.h"
19 #include "base/feature_list.h"
20 #include "base/features.h"
21 #include "base/json/json_reader.h"
22 #include "base/metrics/histogram_functions.h"
23 #include "base/notreached.h"
24 #include "base/numerics/safe_conversions.h"
25 #include "base/ranges/algorithm.h"
26 #include "base/strings/string_number_conversions.h"
27 #include "base/strings/string_util.h"
28 #include "base/strings/stringprintf.h"
29 #include "base/strings/utf_string_conversion_utils.h"
30 #include "base/strings/utf_string_conversions.h"
31 #include "base/third_party/icu/icu_utf.h"
32 
33 namespace base {
34 namespace internal {
35 
36 namespace {
37 
38 // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
39 static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
40               "JSONParser error out of bounds");
41 
ErrorCodeToString(JSONParser::JsonParseError error_code)42 std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
43   switch (error_code) {
44     case JSONParser::JSON_NO_ERROR:
45       return std::string();
46     case JSONParser::JSON_SYNTAX_ERROR:
47       return JSONParser::kSyntaxError;
48     case JSONParser::JSON_INVALID_ESCAPE:
49       return JSONParser::kInvalidEscape;
50     case JSONParser::JSON_UNEXPECTED_TOKEN:
51       return JSONParser::kUnexpectedToken;
52     case JSONParser::JSON_TRAILING_COMMA:
53       return JSONParser::kTrailingComma;
54     case JSONParser::JSON_TOO_MUCH_NESTING:
55       return JSONParser::kTooMuchNesting;
56     case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
57       return JSONParser::kUnexpectedDataAfterRoot;
58     case JSONParser::JSON_UNSUPPORTED_ENCODING:
59       return JSONParser::kUnsupportedEncoding;
60     case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
61       return JSONParser::kUnquotedDictionaryKey;
62     case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
63       return JSONParser::kUnrepresentableNumber;
64     case JSONParser::JSON_PARSE_ERROR_COUNT:
65       NOTREACHED();
66   }
67   NOTREACHED();
68 }
69 
70 const int32_t kExtendedASCIIStart = 0x80;
71 constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
72 
73 // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
74 // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
75 // permitted.
UnprefixedHexStringToInt(std::string_view input,int * output)76 bool UnprefixedHexStringToInt(std::string_view input, int* output) {
77   for (size_t i = 0; i < input.size(); i++) {
78     if (!IsHexDigit(input[i])) {
79       return false;
80     }
81   }
82   return HexStringToInt(input, output);
83 }
84 
85 // These values are persisted to logs. Entries should not be renumbered and
86 // numeric values should never be reused.
87 enum class ChromiumJsonExtension {
88   kCComment,
89   kCppComment,
90   kXEscape,
91   kVerticalTabEscape,
92   kControlCharacter,
93   kNewlineInString,
94   kMaxValue = kNewlineInString,
95 };
96 
97 const char kExtensionHistogramName[] =
98     "Security.JSONParser.ChromiumExtensionUsage";
99 
100 }  // namespace
101 
102 // This is U+FFFD.
103 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
104 
105 const char JSONParser::kSyntaxError[] = "Syntax error.";
106 const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
107 const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
108 const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
109 const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
110 const char JSONParser::kUnexpectedDataAfterRoot[] =
111     "Unexpected data after root element.";
112 const char JSONParser::kUnsupportedEncoding[] =
113     "Unsupported encoding. JSON must be UTF-8.";
114 const char JSONParser::kUnquotedDictionaryKey[] =
115     "Dictionary keys must be quoted.";
116 const char JSONParser::kUnrepresentableNumber[] =
117     "Number cannot be represented.";
118 
JSONParser(int options,size_t max_depth)119 JSONParser::JSONParser(int options, size_t max_depth)
120     : options_(options),
121       max_depth_(max_depth),
122       index_(0),
123       stack_depth_(0),
124       line_number_(0),
125       index_last_line_(0),
126       error_code_(JSON_NO_ERROR),
127       error_line_(0),
128       error_column_(0) {
129   CHECK_LE(max_depth, kAbsoluteMaxDepth);
130 }
131 
132 JSONParser::~JSONParser() = default;
133 
Parse(std::string_view input)134 std::optional<Value> JSONParser::Parse(std::string_view input) {
135   input_ = input;
136   index_ = 0;
137   // Line and column counting is 1-based, but |index_| is 0-based. For example,
138   // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
139   // 2) and have indexes of 0 and 4. We track the line number explicitly (the
140   // |line_number_| field) and the column number implicitly (the difference
141   // between |index_| and |index_last_line_|). In calculating that difference,
142   // |index_last_line_| is the index of the '\r' or '\n', not the index of the
143   // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
144   // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
145   // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
146   // index of the imaginary '\n' immediately before the start of the string:
147   // 'A' is in column (0 - -1) = 1.
148   line_number_ = 1;
149   index_last_line_ = static_cast<size_t>(-1);
150 
151   error_code_ = JSON_NO_ERROR;
152   error_line_ = 0;
153   error_column_ = 0;
154 
155   // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
156   // advance the start position to avoid the ParseNextToken function mis-
157   // treating a Unicode BOM as an invalid character and returning NULL.
158   ConsumeIfMatch("\xEF\xBB\xBF");
159 
160   // Parse the first and any nested tokens.
161   std::optional<Value> root(ParseNextToken());
162   if (!root)
163     return std::nullopt;
164 
165   // Make sure the input stream is at an end.
166   if (GetNextToken() != T_END_OF_INPUT) {
167     ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
168     return std::nullopt;
169   }
170 
171   return root;
172 }
173 
error_code() const174 JSONParser::JsonParseError JSONParser::error_code() const {
175   return error_code_;
176 }
177 
GetErrorMessage() const178 std::string JSONParser::GetErrorMessage() const {
179   return FormatErrorMessage(error_line_, error_column_,
180                             ErrorCodeToString(error_code_));
181 }
182 
error_line() const183 int JSONParser::error_line() const {
184   return error_line_;
185 }
186 
error_column() const187 int JSONParser::error_column() const {
188   return error_column_;
189 }
190 
191 // JSONParser private //////////////////////////////////////////////////////////
192 
PeekChars(size_t count)193 std::optional<std::string_view> JSONParser::PeekChars(size_t count) {
194   if (count > input_.length() - index_) {
195     return std::nullopt;
196   }
197   // Using string_view::substr() was historically significantly slower
198   // (according to base_perftests) than constructing a substring manually.
199   //
200   // TODO(crbug.com/40284755): Is this still the case? Ideally the bounds check
201   // performed by substr would be deleted by the optimizer for being redundant
202   // with the runtime check above. However, to do so, the compiler would need
203   // to know `index_ <= input_.length()` is a class invariant. If we
204   // restructured the code so that we only stored the remaining data, that
205   // would avoid this, but it would prevent rewinding (the places in this file
206   // which look at `input_[index_ - 1]`.)
207   return std::string_view(input_.data() + index_, count);
208 }
209 
PeekChar()210 std::optional<char> JSONParser::PeekChar() {
211   std::optional<std::string_view> chars = PeekChars(1);
212   if (chars)
213     return (*chars)[0];
214   return std::nullopt;
215 }
216 
ConsumeChars(size_t count)217 std::optional<std::string_view> JSONParser::ConsumeChars(size_t count) {
218   std::optional<std::string_view> chars = PeekChars(count);
219   if (chars)
220     index_ += count;
221   return chars;
222 }
223 
ConsumeChar()224 std::optional<char> JSONParser::ConsumeChar() {
225   std::optional<std::string_view> chars = ConsumeChars(1);
226   if (chars)
227     return (*chars)[0];
228   return std::nullopt;
229 }
230 
pos()231 const char* JSONParser::pos() {
232   CHECK_LE(index_, input_.length());
233   return input_.data() + index_;
234 }
235 
GetNextToken()236 JSONParser::Token JSONParser::GetNextToken() {
237   EatWhitespaceAndComments();
238 
239   std::optional<char> c = PeekChar();
240   if (!c)
241     return T_END_OF_INPUT;
242 
243   switch (*c) {
244     case '{':
245       return T_OBJECT_BEGIN;
246     case '}':
247       return T_OBJECT_END;
248     case '[':
249       return T_ARRAY_BEGIN;
250     case ']':
251       return T_ARRAY_END;
252     case '"':
253       return T_STRING;
254     case '0':
255     case '1':
256     case '2':
257     case '3':
258     case '4':
259     case '5':
260     case '6':
261     case '7':
262     case '8':
263     case '9':
264     case '-':
265       return T_NUMBER;
266     case 't':
267       return T_BOOL_TRUE;
268     case 'f':
269       return T_BOOL_FALSE;
270     case 'n':
271       return T_NULL;
272     case ',':
273       return T_LIST_SEPARATOR;
274     case ':':
275       return T_OBJECT_PAIR_SEPARATOR;
276     default:
277       return T_INVALID_TOKEN;
278   }
279 }
280 
EatWhitespaceAndComments()281 void JSONParser::EatWhitespaceAndComments() {
282   while (std::optional<char> c = PeekChar()) {
283     switch (*c) {
284       case '\r':
285       case '\n':
286         index_last_line_ = index_;
287         // Don't increment line_number_ twice for "\r\n".
288         if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
289           ++line_number_;
290         }
291         [[fallthrough]];
292       case ' ':
293       case '\t':
294         ConsumeChar();
295         break;
296       case '/':
297         if (!EatComment())
298           return;
299         break;
300       default:
301         return;
302     }
303   }
304 }
305 
EatComment()306 bool JSONParser::EatComment() {
307   std::optional<std::string_view> comment_start = PeekChars(2);
308   if (!comment_start)
309     return false;
310 
311   const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
312 
313   if (comment_start == "//") {
314     UmaHistogramEnumeration(kExtensionHistogramName,
315                             ChromiumJsonExtension::kCppComment);
316     if (!comments_allowed) {
317       ReportError(JSON_UNEXPECTED_TOKEN, 0);
318       return false;
319     }
320 
321     ConsumeChars(2);
322     // Single line comment, read to newline.
323     while (std::optional<char> c = PeekChar()) {
324       if (c == '\n' || c == '\r')
325         return true;
326       ConsumeChar();
327     }
328   } else if (comment_start == "/*") {
329     UmaHistogramEnumeration(kExtensionHistogramName,
330                             ChromiumJsonExtension::kCComment);
331     if (!comments_allowed) {
332       ReportError(JSON_UNEXPECTED_TOKEN, 0);
333       return false;
334     }
335 
336     ConsumeChars(2);
337     char previous_char = '\0';
338     // Block comment, read until end marker.
339     while (std::optional<char> c = PeekChar()) {
340       if (previous_char == '*' && c == '/') {
341         // EatWhitespaceAndComments will inspect pos(), which will still be on
342         // the last / of the comment, so advance once more (which may also be
343         // end of input).
344         ConsumeChar();
345         return true;
346       }
347       previous_char = *ConsumeChar();
348     }
349 
350     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
351   }
352 
353   return false;
354 }
355 
ParseNextToken()356 std::optional<Value> JSONParser::ParseNextToken() {
357   return ParseToken(GetNextToken());
358 }
359 
ParseToken(Token token)360 std::optional<Value> JSONParser::ParseToken(Token token) {
361   switch (token) {
362     case T_OBJECT_BEGIN:
363       return ConsumeDictionary();
364     case T_ARRAY_BEGIN:
365       return ConsumeList();
366     case T_STRING:
367       return ConsumeString();
368     case T_NUMBER:
369       return ConsumeNumber();
370     case T_BOOL_TRUE:
371     case T_BOOL_FALSE:
372     case T_NULL:
373       return ConsumeLiteral();
374     default:
375       ReportError(JSON_UNEXPECTED_TOKEN, 0);
376       return std::nullopt;
377   }
378 }
379 
ConsumeDictionary()380 std::optional<Value> JSONParser::ConsumeDictionary() {
381   if (ConsumeChar() != '{') {
382     ReportError(JSON_UNEXPECTED_TOKEN, 0);
383     return std::nullopt;
384   }
385 
386   StackMarker depth_check(max_depth_, &stack_depth_);
387   if (depth_check.IsTooDeep()) {
388     ReportError(JSON_TOO_MUCH_NESTING, -1);
389     return std::nullopt;
390   }
391 
392   std::vector<std::pair<std::string, Value>> values;
393 
394   Token token = GetNextToken();
395   while (token != T_OBJECT_END) {
396     if (token != T_STRING) {
397       ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
398       return std::nullopt;
399     }
400 
401     // First consume the key.
402     std::optional<std::string> key = ConsumeStringRaw();
403     if (!key) {
404       return std::nullopt;
405     }
406 
407     // Read the separator.
408     token = GetNextToken();
409     if (token != T_OBJECT_PAIR_SEPARATOR) {
410       ReportError(JSON_SYNTAX_ERROR, 0);
411       return std::nullopt;
412     }
413 
414     // The next token is the value. Ownership transfers to |dict|.
415     ConsumeChar();
416     std::optional<Value> value = ParseNextToken();
417     if (!value) {
418       // ReportError from deeper level.
419       return std::nullopt;
420     }
421 
422     values.emplace_back(std::move(*key), std::move(*value));
423 
424     token = GetNextToken();
425     if (token == T_LIST_SEPARATOR) {
426       ConsumeChar();
427       token = GetNextToken();
428       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
429         ReportError(JSON_TRAILING_COMMA, 0);
430         return std::nullopt;
431       }
432     } else if (token != T_OBJECT_END) {
433       ReportError(JSON_SYNTAX_ERROR, 0);
434       return std::nullopt;
435     }
436   }
437 
438   ConsumeChar();  // Closing '}'.
439   // Reverse |dict_storage| to keep the last of elements with the same key in
440   // the input.
441   ranges::reverse(values);
442   return Value(Value::Dict(std::make_move_iterator(values.begin()),
443                            std::make_move_iterator(values.end())));
444 }
445 
ConsumeList()446 std::optional<Value> JSONParser::ConsumeList() {
447   if (ConsumeChar() != '[') {
448     ReportError(JSON_UNEXPECTED_TOKEN, 0);
449     return std::nullopt;
450   }
451 
452   StackMarker depth_check(max_depth_, &stack_depth_);
453   if (depth_check.IsTooDeep()) {
454     ReportError(JSON_TOO_MUCH_NESTING, -1);
455     return std::nullopt;
456   }
457 
458   Value::List list;
459 
460   Token token = GetNextToken();
461   while (token != T_ARRAY_END) {
462     std::optional<Value> item = ParseToken(token);
463     if (!item) {
464       // ReportError from deeper level.
465       return std::nullopt;
466     }
467 
468     list.Append(std::move(*item));
469 
470     token = GetNextToken();
471     if (token == T_LIST_SEPARATOR) {
472       ConsumeChar();
473       token = GetNextToken();
474       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
475         ReportError(JSON_TRAILING_COMMA, 0);
476         return std::nullopt;
477       }
478     } else if (token != T_ARRAY_END) {
479       ReportError(JSON_SYNTAX_ERROR, 0);
480       return std::nullopt;
481     }
482   }
483 
484   ConsumeChar();  // Closing ']'.
485 
486   return Value(std::move(list));
487 }
488 
ConsumeString()489 std::optional<Value> JSONParser::ConsumeString() {
490   std::optional<std::string> string = ConsumeStringRaw();
491   if (!string) {
492     return std::nullopt;
493   }
494   return Value(std::move(*string));
495 }
496 
ConsumeStringRaw()497 std::optional<std::string> JSONParser::ConsumeStringRaw() {
498   if (ConsumeChar() != '"') {
499     ReportError(JSON_UNEXPECTED_TOKEN, 0);
500     return std::nullopt;
501   }
502 
503   std::string string;
504   for (;;) {
505     auto [result, consumed] = ConsumeStringPart();
506     switch (result) {
507       case StringResult::kError:
508         return std::nullopt;
509 
510       case StringResult::kDone:
511         // This is the last time we're appending, so pre-reserve the desired
512         // size, to prevent `+=` from overallocating. (In other cases, the
513         // overallocating is desirable for amortization.) In particular,
514         // the common case is that `string` is empty and we return in one step.
515         string.reserve(string.size() + consumed.size());
516         string += consumed;
517         return std::move(string);
518 
519       case StringResult::kReplacementCharacter:
520         string += consumed;
521         string += kUnicodeReplacementString;
522         break;  // Keep parsing.
523 
524       case StringResult::kEscape:
525         string += consumed;
526         std::optional<char> escape_char = ConsumeChar();
527         if (!escape_char) {
528           ReportError(JSON_INVALID_ESCAPE, -1);
529           return std::nullopt;
530         }
531 
532         switch (*escape_char) {
533           // Allowed esape sequences:
534           case 'x': {  // UTF-8 sequence.
535             // UTF-8 \x escape sequences are not allowed in the spec, but they
536             // are supported here for backwards-compatiblity with the old
537             // parser.
538             UmaHistogramEnumeration(kExtensionHistogramName,
539                                     ChromiumJsonExtension::kXEscape);
540             if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
541               ReportError(JSON_INVALID_ESCAPE, -1);
542               return std::nullopt;
543             }
544 
545             std::optional<std::string_view> escape_sequence = ConsumeChars(2);
546             if (!escape_sequence) {
547               ReportError(JSON_INVALID_ESCAPE, -3);
548               return std::nullopt;
549             }
550 
551             int hex_digit = 0;
552             if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit)) {
553               ReportError(JSON_INVALID_ESCAPE, -3);
554               return std::nullopt;
555             }
556 
557             // A two-character hex sequence is at most 0xff and all codepoints
558             // up to 0xff are valid.
559             DCHECK_LE(hex_digit, 0xff);
560             DCHECK(IsValidCharacter(hex_digit));
561             WriteUnicodeCharacter(hex_digit, &string);
562             break;
563           }
564           case 'u': {  // UTF-16 sequence.
565             // UTF units are of the form \uXXXX.
566             base_icu::UChar32 code_point;
567             if (!DecodeUTF16(&code_point)) {
568               ReportError(JSON_INVALID_ESCAPE, -1);
569               return std::nullopt;
570             }
571             WriteUnicodeCharacter(code_point, &string);
572             break;
573           }
574           case '"':
575             string.push_back('"');
576             break;
577           case '\\':
578             string.push_back('\\');
579             break;
580           case '/':
581             string.push_back('/');
582             break;
583           case 'b':
584             string.push_back('\b');
585             break;
586           case 'f':
587             string.push_back('\f');
588             break;
589           case 'n':
590             string.push_back('\n');
591             break;
592           case 'r':
593             string.push_back('\r');
594             break;
595           case 't':
596             string.push_back('\t');
597             break;
598           case 'v':  // Not listed as valid escape sequence in the RFC.
599             UmaHistogramEnumeration(kExtensionHistogramName,
600                                     ChromiumJsonExtension::kVerticalTabEscape);
601             if (!(options_ & JSON_ALLOW_VERT_TAB)) {
602               ReportError(JSON_INVALID_ESCAPE, -1);
603               return std::nullopt;
604             }
605             string.push_back('\v');
606             break;
607           // All other escape squences are illegal.
608           default:
609             ReportError(JSON_INVALID_ESCAPE, -1);
610             return std::nullopt;
611         }
612         break;  // Keep parsing.
613     }
614   }
615 }
616 
617 std::pair<JSONParser::StringResult, std::string_view>
ConsumeStringPart()618 JSONParser::ConsumeStringPart() {
619   const size_t start_index = index_;
620   while (std::optional<char> c = PeekChar()) {
621     // Handle non-ASCII characters, which never trigger any special handling
622     // beyond needing to be valid UTF-8. ASCII characters will be handled
623     // separately below.
624     if (static_cast<unsigned char>(*c) >= kExtendedASCIIStart) {
625       base_icu::UChar32 next_char = 0;
626       size_t last_index = index_;
627       if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
628                                 &next_char)) {
629         if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
630           ReportError(JSON_UNSUPPORTED_ENCODING, 0);
631           // No need to return consumed data.
632           return {StringResult::kError, {}};
633         }
634         ConsumeChar();
635         return {StringResult::kReplacementCharacter,
636                 input_.substr(start_index, last_index - start_index)};
637       }
638 
639       // Valid UTF-8 will be copied as-is into the output, so keep processing.
640       DCHECK_GE(next_char, kExtendedASCIIStart);
641       ConsumeChar();
642       continue;
643     }
644 
645     if (*c == '"') {
646       std::string_view ret = input_.substr(start_index, index_ - start_index);
647       ConsumeChar();
648       return {StringResult::kDone, ret};
649     }
650     if (*c == '\\') {
651       std::string_view ret = input_.substr(start_index, index_ - start_index);
652       ConsumeChar();
653       return {StringResult::kEscape, ret};
654     }
655 
656     // Per Section 7, "All Unicode characters may be placed within the
657     // quotation marks, except for the characters that MUST be escaped:
658     // quotation mark, reverse solidus, and the control characters (U+0000
659     // through U+001F)".
660     if (*c == '\n' || *c == '\r') {
661       UmaHistogramEnumeration(kExtensionHistogramName,
662                               ChromiumJsonExtension::kNewlineInString);
663       if (!(options_ &
664             (JSON_ALLOW_NEWLINES_IN_STRINGS | JSON_ALLOW_CONTROL_CHARS))) {
665         ReportError(JSON_UNSUPPORTED_ENCODING, -1);
666         return {StringResult::kError, {}};  // No need to return consumed data.
667       }
668     } else if (*c <= 0x1F) {
669       UmaHistogramEnumeration(kExtensionHistogramName,
670                               ChromiumJsonExtension::kControlCharacter);
671       if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
672         ReportError(JSON_UNSUPPORTED_ENCODING, -1);
673         return {StringResult::kError, {}};  // No need to return consumed data.
674       }
675     }
676 
677     // If this character is not an escape sequence, track any line breaks and
678     // keep parsing. The JSON spec forbids unescaped ASCII control characters
679     // within a string, including '\r' and '\n', but this implementation is more
680     // lenient.
681     if (*c == '\r' || *c == '\n') {
682       index_last_line_ = index_;
683       // Don't increment line_number_ twice for "\r\n". We are guaranteed that
684       // (index_ > 0) because we are consuming a string, so we must have seen an
685       // opening '"' quote character.
686       if ((*c == '\r') || (input_[index_ - 1] != '\r')) {
687         ++line_number_;
688       }
689     }
690     ConsumeChar();
691   }
692 
693   ReportError(JSON_SYNTAX_ERROR, -1);
694   return {StringResult::kError, {}};  // No need to return consumed data.
695 }
696 
697 // Entry is at the first X in \uXXXX.
DecodeUTF16(base_icu::UChar32 * out_code_point)698 bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
699   std::optional<std::string_view> escape_sequence = ConsumeChars(4);
700   if (!escape_sequence)
701     return false;
702 
703   // Consume the UTF-16 code unit, which may be a high surrogate.
704   int code_unit16_high = 0;
705   if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
706     return false;
707 
708   // If this is a high surrogate, consume the next code unit to get the
709   // low surrogate.
710   if (CBU16_IS_SURROGATE(code_unit16_high)) {
711     // Make sure this is the high surrogate.
712     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
713       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
714         return false;
715       *out_code_point = kUnicodeReplacementPoint;
716       return true;
717     }
718 
719     // Make sure that the token has more characters to consume the
720     // lower surrogate.
721     if (!ConsumeIfMatch("\\u")) {
722       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
723         return false;
724       *out_code_point = kUnicodeReplacementPoint;
725       return true;
726     }
727 
728     escape_sequence = ConsumeChars(4);
729     if (!escape_sequence)
730       return false;
731 
732     int code_unit16_low = 0;
733     if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
734       return false;
735 
736     if (!CBU16_IS_TRAIL(code_unit16_low)) {
737       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
738         return false;
739       *out_code_point = kUnicodeReplacementPoint;
740       return true;
741     }
742 
743     base_icu::UChar32 code_point =
744         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
745 
746     *out_code_point = code_point;
747   } else {
748     // Not a surrogate.
749     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
750 
751     *out_code_point = code_unit16_high;
752   }
753 
754   return true;
755 }
756 
ConsumeNumber()757 std::optional<Value> JSONParser::ConsumeNumber() {
758   const char* num_start = pos();
759   const size_t start_index = index_;
760   size_t end_index = start_index;
761 
762   if (PeekChar() == '-')
763     ConsumeChar();
764 
765   if (!ReadInt(false)) {
766     ReportError(JSON_SYNTAX_ERROR, 0);
767     return std::nullopt;
768   }
769   end_index = index_;
770 
771   // The optional fraction part.
772   if (PeekChar() == '.') {
773     ConsumeChar();
774     if (!ReadInt(true)) {
775       ReportError(JSON_SYNTAX_ERROR, 0);
776       return std::nullopt;
777     }
778     end_index = index_;
779   }
780 
781   // Optional exponent part.
782   std::optional<char> c = PeekChar();
783   if (c == 'e' || c == 'E') {
784     ConsumeChar();
785     if (PeekChar() == '-' || PeekChar() == '+') {
786       ConsumeChar();
787     }
788     if (!ReadInt(true)) {
789       ReportError(JSON_SYNTAX_ERROR, 0);
790       return std::nullopt;
791     }
792     end_index = index_;
793   }
794 
795   std::string_view num_string(num_start, end_index - start_index);
796 
797   int num_int;
798   if (StringToInt(num_string, &num_int)) {
799     // StringToInt will treat `-0` as zero, losing the significance of the
800     // negation.
801     if (num_int == 0 && num_string.starts_with('-')) {
802       return Value(-0.0);
803     }
804     return Value(num_int);
805   }
806 
807   double num_double;
808   if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
809     return Value(num_double);
810   }
811 
812   ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
813   return std::nullopt;
814 }
815 
ReadInt(bool allow_leading_zeros)816 bool JSONParser::ReadInt(bool allow_leading_zeros) {
817   size_t len = 0;
818   char first = 0;
819 
820   while (std::optional<char> c = PeekChar()) {
821     if (!IsAsciiDigit(*c)) {
822       break;
823     }
824 
825     if (len == 0)
826       first = *c;
827 
828     ++len;
829     ConsumeChar();
830   }
831 
832   if (len == 0)
833     return false;
834 
835   if (!allow_leading_zeros && len > 1 && first == '0')
836     return false;
837 
838   return true;
839 }
840 
ConsumeLiteral()841 std::optional<Value> JSONParser::ConsumeLiteral() {
842   if (ConsumeIfMatch("true"))
843     return Value(true);
844   if (ConsumeIfMatch("false"))
845     return Value(false);
846   if (ConsumeIfMatch("null"))
847     return Value(Value::Type::NONE);
848   ReportError(JSON_SYNTAX_ERROR, 0);
849   return std::nullopt;
850 }
851 
ConsumeIfMatch(std::string_view match)852 bool JSONParser::ConsumeIfMatch(std::string_view match) {
853   if (match == PeekChars(match.size())) {
854     ConsumeChars(match.size());
855     return true;
856   }
857   return false;
858 }
859 
ReportError(JsonParseError code,int column_adjust)860 void JSONParser::ReportError(JsonParseError code, int column_adjust) {
861   error_code_ = code;
862   error_line_ = line_number_;
863   error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
864 
865   // For a final blank line ('\n' and then EOF), a negative column_adjust may
866   // put us below 1, which doesn't really make sense for 1-based columns.
867   if (error_column_ < 1) {
868     error_column_ = 1;
869   }
870 }
871 
872 // static
FormatErrorMessage(int line,int column,const std::string & description)873 std::string JSONParser::FormatErrorMessage(int line, int column,
874                                            const std::string& description) {
875   if (line || column) {
876     return StringPrintf("Line: %i, column: %i, %s",
877         line, column, description.c_str());
878   }
879   return description;
880 }
881 
882 }  // namespace internal
883 }  // namespace base
884