1 // Copyright 2012 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // A JSON parser, converting from a base::StringPiece to a base::Value. 6 // 7 // The JSON spec is: 8 // https://tools.ietf.org/rfc/rfc8259.txt 9 // which obsoletes the earlier RFCs 4627, 7158 and 7159. 10 // 11 // This RFC should be equivalent to the informal spec: 12 // https://www.json.org/json-en.html 13 // 14 // Implementation choices permitted by the RFC: 15 // - Nesting is limited (to a configurable depth, 200 by default). 16 // - Numbers are limited to those representable by a finite double. The 17 // conversion from a JSON number (in the base::StringPiece input) to a 18 // double-flavored base::Value may also be lossy. 19 // - The input (which must be UTF-8) may begin with a BOM (Byte Order Mark). 20 // - Duplicate object keys (strings) are silently allowed. Last key-value pair 21 // wins. Previous pairs are discarded. 22 // 23 // Configurable (see the JSONParserOptions type) deviations from the RFC: 24 // - Allow trailing commas: "[1,2,]". 25 // - Replace invalid Unicode with U+FFFD REPLACEMENT CHARACTER. 26 // - Allow "// etc\n" and "/* etc */" C-style comments. 27 // - Allow ASCII control characters, including literal (not escaped) NUL bytes 28 // and new lines, within a JSON string. 29 // - Allow "\\v" escapes within a JSON string, producing a vertical tab. 30 // - Allow "\\x23" escapes within a JSON string. Subtly, the 2-digit hex value 31 // is a Unicode code point, not a UTF-8 byte. For example, "\\xFF" in the 32 // JSON source decodes to a base::Value whose string contains "\xC3\xBF", the 33 // UTF-8 encoding of U+00FF LATIN SMALL LETTER Y WITH DIAERESIS. Converting 34 // from UTF-8 to UTF-16, e.g. via UTF8ToWide, will recover a 16-bit 0x00FF. 35 36 #ifndef BASE_JSON_JSON_READER_H_ 37 #define BASE_JSON_JSON_READER_H_ 38 39 #include <string> 40 41 #include "base/base_export.h" 42 #include "base/json/json_common.h" 43 #include "base/strings/string_number_conversions.h" 44 #include "base/strings/string_piece.h" 45 #include "base/types/expected.h" 46 #include "base/values.h" 47 #include "third_party/abseil-cpp/absl/types/optional.h" 48 49 namespace base { 50 51 enum JSONParserOptions { 52 // Parses the input strictly according to RFC 8259. 53 JSON_PARSE_RFC = 0, 54 55 // Allows commas to exist after the last element in structures. 56 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, 57 58 // If set the parser replaces invalid code points (i.e. lone 59 // surrogates) with the Unicode replacement character (U+FFFD). If 60 // not set, invalid code points trigger a hard error and parsing 61 // fails. 62 JSON_REPLACE_INVALID_CHARACTERS = 1 << 1, 63 64 // Allows both C (/* */) and C++ (//) style comments. 65 JSON_ALLOW_COMMENTS = 1 << 2, 66 67 // Permits unescaped ASCII control characters (such as unescaped \r and \n) 68 // in the range [0x00,0x1F]. 69 JSON_ALLOW_CONTROL_CHARS = 1 << 3, 70 71 // Permits \\v vertical tab escapes. 72 JSON_ALLOW_VERT_TAB = 1 << 4, 73 74 // Permits \\xNN escapes as described above. 75 JSON_ALLOW_X_ESCAPES = 1 << 5, 76 77 // This parser historically accepted, without configuration flags, 78 // non-standard JSON extensions. This flag enables that traditional parsing 79 // behavior. 80 // 81 // This set of options is mirrored in Rust 82 // base::JsonOptions::with_chromium_extensions(). 83 JSON_PARSE_CHROMIUM_EXTENSIONS = JSON_ALLOW_COMMENTS | 84 JSON_ALLOW_CONTROL_CHARS | 85 JSON_ALLOW_VERT_TAB | JSON_ALLOW_X_ESCAPES, 86 }; 87 88 class BASE_EXPORT JSONReader { 89 public: 90 struct BASE_EXPORT Error { 91 std::string message; 92 int line = 0; 93 int column = 0; 94 ToStringError95 std::string ToString() const { 96 return "line " + base::NumberToString(line) + ", column " + 97 base::NumberToString(column) + ": " + message; 98 } 99 }; 100 101 using Result = base::expected<Value, Error>; 102 103 // This class contains only static methods. 104 JSONReader() = delete; 105 JSONReader(const JSONReader&) = delete; 106 JSONReader& operator=(const JSONReader&) = delete; 107 108 // Reads and parses |json|, returning a Value. 109 // If |json| is not a properly formed JSON string, returns absl::nullopt. 110 static absl::optional<Value> Read( 111 StringPiece json, 112 int options = JSON_PARSE_CHROMIUM_EXTENSIONS, 113 size_t max_depth = internal::kAbsoluteMaxDepth); 114 115 // Reads and parses |json|, returning a Value::Dict. 116 // If |json| is not a properly formed JSON dict string, returns absl::nullopt. 117 static absl::optional<Value::Dict> ReadDict( 118 StringPiece json, 119 int options = JSON_PARSE_CHROMIUM_EXTENSIONS, 120 size_t max_depth = internal::kAbsoluteMaxDepth); 121 122 // Reads and parses |json| like Read(). On success returns a Value as the 123 // expected value. Otherwise, it returns an Error instance, populated with a 124 // formatted error message, an error code, and the error location if 125 // appropriate as the error value of the expected type. 126 static Result ReadAndReturnValueWithError( 127 StringPiece json, 128 int options = JSON_PARSE_CHROMIUM_EXTENSIONS); 129 130 // Determine whether the Rust parser is in use. 131 static bool UsingRust(); 132 }; 133 134 } // namespace base 135 136 #endif // BASE_JSON_JSON_READER_H_ 137