1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_JSON_JSON_PARSER_H_ 6 #define V8_JSON_JSON_PARSER_H_ 7 8 #include "src/execution/isolate.h" 9 #include "src/heap/factory.h" 10 #include "src/objects/objects.h" 11 #include "src/zone/zone-containers.h" 12 13 namespace v8 { 14 namespace internal { 15 16 enum ParseElementResult { kElementFound, kElementNotFound }; 17 18 class JsonString final { 19 public: JsonString()20 JsonString() 21 : start_(0), 22 length_(0), 23 needs_conversion_(false), 24 internalize_(false), 25 has_escape_(false), 26 is_index_(false) {} 27 JsonString(uint32_t index)28 explicit JsonString(uint32_t index) 29 : index_(index), 30 length_(0), 31 needs_conversion_(false), 32 internalize_(false), 33 has_escape_(false), 34 is_index_(true) {} 35 JsonString(int start,int length,bool needs_conversion,bool needs_internalization,bool has_escape)36 JsonString(int start, int length, bool needs_conversion, 37 bool needs_internalization, bool has_escape) 38 : start_(start), 39 length_(length), 40 needs_conversion_(needs_conversion), 41 internalize_(needs_internalization || 42 length_ <= kMaxInternalizedStringValueLength), 43 has_escape_(has_escape), 44 is_index_(false) {} 45 internalize()46 bool internalize() const { 47 DCHECK(!is_index_); 48 return internalize_; 49 } 50 needs_conversion()51 bool needs_conversion() const { 52 DCHECK(!is_index_); 53 return needs_conversion_; 54 } 55 has_escape()56 bool has_escape() const { 57 DCHECK(!is_index_); 58 return has_escape_; 59 } 60 start()61 int start() const { 62 DCHECK(!is_index_); 63 return start_; 64 } 65 length()66 int length() const { 67 DCHECK(!is_index_); 68 return length_; 69 } 70 index()71 uint32_t index() const { 72 DCHECK(is_index_); 73 return index_; 74 } 75 is_index()76 bool is_index() const { return is_index_; } 77 78 private: 79 static const int kMaxInternalizedStringValueLength = 10; 80 81 union { 82 const int start_; 83 const uint32_t index_; 84 }; 85 const int length_; 86 const bool needs_conversion_ : 1; 87 const bool internalize_ : 1; 88 const bool has_escape_ : 1; 89 const bool is_index_ : 1; 90 }; 91 92 struct JsonProperty { JsonPropertyJsonProperty93 JsonProperty() { UNREACHABLE(); } JsonPropertyJsonProperty94 explicit JsonProperty(const JsonString& string) : string(string) {} 95 96 JsonString string; 97 Handle<Object> value; 98 }; 99 100 class JsonParseInternalizer { 101 public: 102 static MaybeHandle<Object> Internalize(Isolate* isolate, 103 Handle<Object> object, 104 Handle<Object> reviver); 105 106 private: JsonParseInternalizer(Isolate * isolate,Handle<JSReceiver> reviver)107 JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver) 108 : isolate_(isolate), reviver_(reviver) {} 109 110 MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder, 111 Handle<String> key); 112 113 bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name); 114 115 Isolate* isolate_; 116 Handle<JSReceiver> reviver_; 117 }; 118 119 enum class JsonToken : uint8_t { 120 NUMBER, 121 STRING, 122 LBRACE, 123 RBRACE, 124 LBRACK, 125 RBRACK, 126 TRUE_LITERAL, 127 FALSE_LITERAL, 128 NULL_LITERAL, 129 WHITESPACE, 130 COLON, 131 COMMA, 132 ILLEGAL, 133 EOS 134 }; 135 136 // A simple json parser. 137 template <typename Char> 138 class JsonParser final { 139 public: 140 using SeqString = typename CharTraits<Char>::String; 141 using SeqExternalString = typename CharTraits<Char>::ExternalString; 142 Parse(Isolate * isolate,Handle<String> source,Handle<Object> reviver)143 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse( 144 Isolate* isolate, Handle<String> source, Handle<Object> reviver) { 145 Handle<Object> result; 146 ASSIGN_RETURN_ON_EXCEPTION(isolate, result, 147 JsonParser(isolate, source).ParseJson(), Object); 148 if (reviver->IsCallable()) { 149 return JsonParseInternalizer::Internalize(isolate, result, reviver); 150 } 151 return result; 152 } 153 154 static constexpr uc32 kEndOfString = static_cast<uc32>(-1); 155 static constexpr uc32 kInvalidUnicodeCharacter = static_cast<uc32>(-1); 156 157 private: 158 struct JsonContinuation { 159 enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement }; JsonContinuationJsonContinuation160 JsonContinuation(Isolate* isolate, Type type, size_t index) 161 : scope(isolate), 162 type_(type), 163 index(static_cast<uint32_t>(index)), 164 max_index(0), 165 elements(0) {} 166 typeJsonContinuation167 Type type() const { return static_cast<Type>(type_); } set_typeJsonContinuation168 void set_type(Type type) { type_ = static_cast<uint8_t>(type); } 169 170 HandleScope scope; 171 // Unfortunately GCC doesn't like packing Type in two bits. 172 uint32_t type_ : 2; 173 uint32_t index : 30; 174 uint32_t max_index; 175 uint32_t elements; 176 }; 177 178 JsonParser(Isolate* isolate, Handle<String> source); 179 ~JsonParser(); 180 181 // Parse a string containing a single JSON value. 182 MaybeHandle<Object> ParseJson(); 183 advance()184 void advance() { ++cursor_; } 185 CurrentCharacter()186 uc32 CurrentCharacter() { 187 if (V8_UNLIKELY(is_at_end())) return kEndOfString; 188 return *cursor_; 189 } 190 NextCharacter()191 uc32 NextCharacter() { 192 advance(); 193 return CurrentCharacter(); 194 } 195 196 void AdvanceToNonDecimal(); 197 peek()198 V8_INLINE JsonToken peek() const { return next_; } 199 Consume(JsonToken token)200 void Consume(JsonToken token) { 201 DCHECK_EQ(peek(), token); 202 advance(); 203 } 204 Expect(JsonToken token)205 void Expect(JsonToken token) { 206 if (V8_LIKELY(peek() == token)) { 207 advance(); 208 } else { 209 ReportUnexpectedToken(peek()); 210 } 211 } 212 ExpectNext(JsonToken token)213 void ExpectNext(JsonToken token) { 214 SkipWhitespace(); 215 Expect(token); 216 } 217 Check(JsonToken token)218 bool Check(JsonToken token) { 219 SkipWhitespace(); 220 if (next_ != token) return false; 221 advance(); 222 return true; 223 } 224 225 template <size_t N> ScanLiteral(const char (& s)[N])226 void ScanLiteral(const char (&s)[N]) { 227 DCHECK(!is_at_end()); 228 // There's at least 1 character, we always consume a character and compare 229 // the next character. The first character was compared before we jumped 230 // to ScanLiteral. 231 STATIC_ASSERT(N > 2); 232 size_t remaining = static_cast<size_t>(end_ - cursor_); 233 if (V8_LIKELY(remaining >= N - 1 && 234 CompareChars(s + 1, cursor_ + 1, N - 2) == 0)) { 235 cursor_ += N - 1; 236 return; 237 } 238 239 cursor_++; 240 for (size_t i = 0; i < Min(N - 2, remaining - 1); i++) { 241 if (*(s + 1 + i) != *cursor_) { 242 ReportUnexpectedCharacter(*cursor_); 243 return; 244 } 245 cursor_++; 246 } 247 248 DCHECK(is_at_end()); 249 ReportUnexpectedToken(JsonToken::EOS); 250 } 251 252 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 253 // section 15.12.1.1. The only allowed whitespace characters between tokens 254 // are tab, carriage-return, newline and space. 255 void SkipWhitespace(); 256 257 // A JSON string (production JSONString) is subset of valid JavaScript string 258 // literals. The string must only be double-quoted (not single-quoted), and 259 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 260 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 261 JsonString ScanJsonString(bool needs_internalization); 262 JsonString ScanJsonPropertyKey(JsonContinuation* cont); 263 uc32 ScanUnicodeCharacter(); 264 Handle<String> MakeString(const JsonString& string, 265 Handle<String> hint = Handle<String>()); 266 267 template <typename SinkChar> 268 void DecodeString(SinkChar* sink, int start, int length); 269 270 template <typename SinkSeqString> 271 Handle<String> DecodeString(const JsonString& string, 272 Handle<SinkSeqString> intermediate, 273 Handle<String> hint); 274 275 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 276 // decimal number literals. 277 // It includes an optional minus sign, must have at least one 278 // digit before and after a decimal point, may not have prefixed zeros (unless 279 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 280 // Hexadecimal and octal numbers are not allowed. 281 Handle<Object> ParseJsonNumber(); 282 283 // Parse a single JSON value from input (grammar production JSONValue). 284 // A JSON value is either a (double-quoted) string literal, a number literal, 285 // one of "true", "false", or "null", or an object or array literal. 286 MaybeHandle<Object> ParseJsonValue(); 287 288 Handle<Object> BuildJsonObject( 289 const JsonContinuation& cont, 290 const std::vector<JsonProperty>& property_stack, Handle<Map> feedback); 291 Handle<Object> BuildJsonArray( 292 const JsonContinuation& cont, 293 const std::vector<Handle<Object>>& element_stack); 294 295 // Mark that a parsing error has happened at the current character. 296 void ReportUnexpectedCharacter(uc32 c); 297 // Mark that a parsing error has happened at the current token. 298 void ReportUnexpectedToken(JsonToken token); 299 isolate()300 inline Isolate* isolate() { return isolate_; } factory()301 inline Factory* factory() { return isolate_->factory(); } object_constructor()302 inline Handle<JSFunction> object_constructor() { return object_constructor_; } 303 304 static const int kInitialSpecialStringLength = 32; 305 UpdatePointersCallback(v8::Isolate * v8_isolate,v8::GCType type,v8::GCCallbackFlags flags,void * parser)306 static void UpdatePointersCallback(v8::Isolate* v8_isolate, v8::GCType type, 307 v8::GCCallbackFlags flags, void* parser) { 308 reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers(); 309 } 310 UpdatePointers()311 void UpdatePointers() { 312 DisallowHeapAllocation no_gc; 313 const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc); 314 if (chars_ != chars) { 315 size_t position = cursor_ - chars_; 316 size_t length = end_ - chars_; 317 chars_ = chars; 318 cursor_ = chars_ + position; 319 end_ = chars_ + length; 320 } 321 } 322 323 private: 324 static const bool kIsOneByte = sizeof(Char) == 1; 325 is_at_end()326 bool is_at_end() const { 327 DCHECK_LE(cursor_, end_); 328 return cursor_ == end_; 329 } 330 position()331 int position() const { return static_cast<int>(cursor_ - chars_); } 332 333 Isolate* isolate_; 334 const uint64_t hash_seed_; 335 JsonToken next_; 336 // Indicates whether the bytes underneath source_ can relocate during GC. 337 bool chars_may_relocate_; 338 Handle<JSFunction> object_constructor_; 339 const Handle<String> original_source_; 340 Handle<String> source_; 341 342 // Cached pointer to the raw chars in source. In case source is on-heap, we 343 // register an UpdatePointers callback. For this reason, chars_, cursor_ and 344 // end_ should never be locally cached across a possible allocation. The scope 345 // in which we cache chars has to be guarded by a DisallowHeapAllocation 346 // scope. 347 const Char* cursor_; 348 const Char* end_; 349 const Char* chars_; 350 }; 351 352 // Explicit instantiation declarations. 353 extern template class JsonParser<uint8_t>; 354 extern template class JsonParser<uint16_t>; 355 356 } // namespace internal 357 } // namespace v8 358 359 #endif // V8_JSON_JSON_PARSER_H_ 360