1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_JSON_JSON_PARSER_H_ 6 #define V8_JSON_JSON_PARSER_H_ 7 8 #include "include/v8-callbacks.h" 9 #include "src/base/small-vector.h" 10 #include "src/base/strings.h" 11 #include "src/common/high-allocation-throughput-scope.h" 12 #include "src/execution/isolate.h" 13 #include "src/heap/factory.h" 14 #include "src/objects/objects.h" 15 #include "src/zone/zone-containers.h" 16 17 namespace v8 { 18 namespace internal { 19 20 enum ParseElementResult { kElementFound, kElementNotFound }; 21 22 class JsonString final { 23 public: JsonString()24 JsonString() 25 : start_(0), 26 length_(0), 27 needs_conversion_(false), 28 internalize_(false), 29 has_escape_(false), 30 is_index_(false) {} 31 JsonString(uint32_t index)32 explicit JsonString(uint32_t index) 33 : index_(index), 34 length_(0), 35 needs_conversion_(false), 36 internalize_(false), 37 has_escape_(false), 38 is_index_(true) {} 39 JsonString(int start,int length,bool needs_conversion,bool needs_internalization,bool has_escape)40 JsonString(int start, int length, bool needs_conversion, 41 bool needs_internalization, bool has_escape) 42 : start_(start), 43 length_(length), 44 needs_conversion_(needs_conversion), 45 internalize_(needs_internalization || 46 length_ <= kMaxInternalizedStringValueLength), 47 has_escape_(has_escape), 48 is_index_(false) {} 49 internalize()50 bool internalize() const { 51 DCHECK(!is_index_); 52 return internalize_; 53 } 54 needs_conversion()55 bool needs_conversion() const { 56 DCHECK(!is_index_); 57 return needs_conversion_; 58 } 59 has_escape()60 bool has_escape() const { 61 DCHECK(!is_index_); 62 return has_escape_; 63 } 64 start()65 int start() const { 66 DCHECK(!is_index_); 67 return start_; 68 } 69 length()70 int length() const { 71 DCHECK(!is_index_); 72 return length_; 73 } 74 index()75 uint32_t index() const { 76 DCHECK(is_index_); 77 return index_; 78 } 79 is_index()80 bool is_index() const { return is_index_; } 81 82 private: 83 static const int kMaxInternalizedStringValueLength = 10; 84 85 union { 86 const int start_; 87 const uint32_t index_; 88 }; 89 const int length_; 90 const bool needs_conversion_ : 1; 91 const bool internalize_ : 1; 92 const bool has_escape_ : 1; 93 const bool is_index_ : 1; 94 }; 95 96 struct JsonProperty { JsonPropertyJsonProperty97 JsonProperty() { UNREACHABLE(); } JsonPropertyJsonProperty98 explicit JsonProperty(const JsonString& string) : string(string) {} 99 100 JsonString string; 101 Handle<Object> value; 102 }; 103 104 class JsonParseInternalizer { 105 public: 106 static MaybeHandle<Object> Internalize(Isolate* isolate, 107 Handle<Object> object, 108 Handle<Object> reviver); 109 110 private: JsonParseInternalizer(Isolate * isolate,Handle<JSReceiver> reviver)111 JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver) 112 : isolate_(isolate), reviver_(reviver) {} 113 114 MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder, 115 Handle<String> key); 116 117 bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name); 118 119 Isolate* isolate_; 120 Handle<JSReceiver> reviver_; 121 }; 122 123 enum class JsonToken : uint8_t { 124 NUMBER, 125 STRING, 126 LBRACE, 127 RBRACE, 128 LBRACK, 129 RBRACK, 130 TRUE_LITERAL, 131 FALSE_LITERAL, 132 NULL_LITERAL, 133 WHITESPACE, 134 COLON, 135 COMMA, 136 ILLEGAL, 137 EOS 138 }; 139 140 // A simple json parser. 141 template <typename Char> 142 class JsonParser final { 143 public: 144 using SeqString = typename CharTraits<Char>::String; 145 using SeqExternalString = typename CharTraits<Char>::ExternalString; 146 Parse(Isolate * isolate,Handle<String> source,Handle<Object> reviver)147 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse( 148 Isolate* isolate, Handle<String> source, Handle<Object> reviver) { 149 HighAllocationThroughputScope high_throughput_scope( 150 V8::GetCurrentPlatform()); 151 Handle<Object> result; 152 ASSIGN_RETURN_ON_EXCEPTION(isolate, result, 153 JsonParser(isolate, source).ParseJson(), Object); 154 if (reviver->IsCallable()) { 155 return JsonParseInternalizer::Internalize(isolate, result, reviver); 156 } 157 return result; 158 } 159 160 static constexpr base::uc32 kEndOfString = static_cast<base::uc32>(-1); 161 static constexpr base::uc32 kInvalidUnicodeCharacter = 162 static_cast<base::uc32>(-1); 163 164 private: 165 template <typename T> 166 using SmallVector = base::SmallVector<T, 16>; 167 struct JsonContinuation { 168 enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement }; JsonContinuationJsonContinuation169 JsonContinuation(Isolate* isolate, Type type, size_t index) 170 : scope(isolate), 171 type_(type), 172 index(static_cast<uint32_t>(index)), 173 max_index(0), 174 elements(0) {} 175 typeJsonContinuation176 Type type() const { return static_cast<Type>(type_); } set_typeJsonContinuation177 void set_type(Type type) { type_ = static_cast<uint8_t>(type); } 178 179 HandleScope scope; 180 // Unfortunately GCC doesn't like packing Type in two bits. 181 uint32_t type_ : 2; 182 uint32_t index : 30; 183 uint32_t max_index; 184 uint32_t elements; 185 }; 186 187 JsonParser(Isolate* isolate, Handle<String> source); 188 ~JsonParser(); 189 190 // Parse a string containing a single JSON value. 191 MaybeHandle<Object> ParseJson(); 192 advance()193 void advance() { ++cursor_; } 194 CurrentCharacter()195 base::uc32 CurrentCharacter() { 196 if (V8_UNLIKELY(is_at_end())) return kEndOfString; 197 return *cursor_; 198 } 199 NextCharacter()200 base::uc32 NextCharacter() { 201 advance(); 202 return CurrentCharacter(); 203 } 204 205 void AdvanceToNonDecimal(); 206 peek()207 V8_INLINE JsonToken peek() const { return next_; } 208 Consume(JsonToken token)209 void Consume(JsonToken token) { 210 DCHECK_EQ(peek(), token); 211 advance(); 212 } 213 Expect(JsonToken token)214 void Expect(JsonToken token) { 215 if (V8_LIKELY(peek() == token)) { 216 advance(); 217 } else { 218 ReportUnexpectedToken(peek()); 219 } 220 } 221 ExpectNext(JsonToken token)222 void ExpectNext(JsonToken token) { 223 SkipWhitespace(); 224 Expect(token); 225 } 226 Check(JsonToken token)227 bool Check(JsonToken token) { 228 SkipWhitespace(); 229 if (next_ != token) return false; 230 advance(); 231 return true; 232 } 233 234 template <size_t N> ScanLiteral(const char (& s)[N])235 void ScanLiteral(const char (&s)[N]) { 236 DCHECK(!is_at_end()); 237 // There's at least 1 character, we always consume a character and compare 238 // the next character. The first character was compared before we jumped 239 // to ScanLiteral. 240 STATIC_ASSERT(N > 2); 241 size_t remaining = static_cast<size_t>(end_ - cursor_); 242 if (V8_LIKELY(remaining >= N - 1 && 243 CompareCharsEqual(s + 1, cursor_ + 1, N - 2))) { 244 cursor_ += N - 1; 245 return; 246 } 247 248 cursor_++; 249 for (size_t i = 0; i < std::min(N - 2, remaining - 1); i++) { 250 if (*(s + 1 + i) != *cursor_) { 251 ReportUnexpectedCharacter(*cursor_); 252 return; 253 } 254 cursor_++; 255 } 256 257 DCHECK(is_at_end()); 258 ReportUnexpectedToken(JsonToken::EOS); 259 } 260 261 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 262 // section 15.12.1.1. The only allowed whitespace characters between tokens 263 // are tab, carriage-return, newline and space. 264 void SkipWhitespace(); 265 266 // A JSON string (production JSONString) is subset of valid JavaScript string 267 // literals. The string must only be double-quoted (not single-quoted), and 268 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 269 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 270 JsonString ScanJsonString(bool needs_internalization); 271 JsonString ScanJsonPropertyKey(JsonContinuation* cont); 272 base::uc32 ScanUnicodeCharacter(); 273 Handle<String> MakeString(const JsonString& string, 274 Handle<String> hint = Handle<String>()); 275 276 template <typename SinkChar> 277 void DecodeString(SinkChar* sink, int start, int length); 278 279 template <typename SinkSeqString> 280 Handle<String> DecodeString(const JsonString& string, 281 Handle<SinkSeqString> intermediate, 282 Handle<String> hint); 283 284 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 285 // decimal number literals. 286 // It includes an optional minus sign, must have at least one 287 // digit before and after a decimal point, may not have prefixed zeros (unless 288 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 289 // Hexadecimal and octal numbers are not allowed. 290 Handle<Object> ParseJsonNumber(); 291 292 // Parse a single JSON value from input (grammar production JSONValue). 293 // A JSON value is either a (double-quoted) string literal, a number literal, 294 // one of "true", "false", or "null", or an object or array literal. 295 MaybeHandle<Object> ParseJsonValue(); 296 297 Handle<Object> BuildJsonObject( 298 const JsonContinuation& cont, 299 const SmallVector<JsonProperty>& property_stack, Handle<Map> feedback); 300 Handle<Object> BuildJsonArray( 301 const JsonContinuation& cont, 302 const SmallVector<Handle<Object>>& element_stack); 303 304 // Mark that a parsing error has happened at the current character. 305 void ReportUnexpectedCharacter(base::uc32 c); 306 // Mark that a parsing error has happened at the current token. 307 void ReportUnexpectedToken(JsonToken token); 308 isolate()309 inline Isolate* isolate() { return isolate_; } factory()310 inline Factory* factory() { return isolate_->factory(); } object_constructor()311 inline Handle<JSFunction> object_constructor() { return object_constructor_; } 312 313 static const int kInitialSpecialStringLength = 32; 314 UpdatePointersCallback(void * parser)315 static void UpdatePointersCallback(void* parser) { 316 reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers(); 317 } 318 UpdatePointers()319 void UpdatePointers() { 320 DisallowGarbageCollection no_gc; 321 const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc); 322 if (chars_ != chars) { 323 size_t position = cursor_ - chars_; 324 size_t length = end_ - chars_; 325 chars_ = chars; 326 cursor_ = chars_ + position; 327 end_ = chars_ + length; 328 } 329 } 330 331 private: 332 static const bool kIsOneByte = sizeof(Char) == 1; 333 is_at_end()334 bool is_at_end() const { 335 DCHECK_LE(cursor_, end_); 336 return cursor_ == end_; 337 } 338 position()339 int position() const { return static_cast<int>(cursor_ - chars_); } 340 341 Isolate* isolate_; 342 const uint64_t hash_seed_; 343 JsonToken next_; 344 // Indicates whether the bytes underneath source_ can relocate during GC. 345 bool chars_may_relocate_; 346 Handle<JSFunction> object_constructor_; 347 const Handle<String> original_source_; 348 Handle<String> source_; 349 350 // Cached pointer to the raw chars in source. In case source is on-heap, we 351 // register an UpdatePointers callback. For this reason, chars_, cursor_ and 352 // end_ should never be locally cached across a possible allocation. The scope 353 // in which we cache chars has to be guarded by a DisallowGarbageCollection 354 // scope. 355 const Char* cursor_; 356 const Char* end_; 357 const Char* chars_; 358 }; 359 360 // Explicit instantiation declarations. 361 extern template class JsonParser<uint8_t>; 362 extern template class JsonParser<uint16_t>; 363 364 } // namespace internal 365 } // namespace v8 366 367 #endif // V8_JSON_JSON_PARSER_H_ 368