1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_AST_AST_VALUE_FACTORY_H_ 29 #define V8_AST_AST_VALUE_FACTORY_H_ 30 31 #include "src/base/hashmap.h" 32 #include "src/conversions.h" 33 #include "src/factory.h" 34 #include "src/globals.h" 35 #include "src/isolate.h" 36 #include "src/utils.h" 37 38 // AstString, AstValue and AstValueFactory are for storing strings and values 39 // independent of the V8 heap and internalizing them later. During parsing, 40 // AstStrings and AstValues are created and stored outside the heap, in 41 // AstValueFactory. After parsing, the strings and values are internalized 42 // (moved into the V8 heap). 43 namespace v8 { 44 namespace internal { 45 46 class AstString : public ZoneObject { 47 public: AstString(bool is_raw)48 explicit AstString(bool is_raw) 49 : next_(nullptr), bit_field_(IsRawStringBits::encode(is_raw)) {} 50 51 int length() const; IsEmpty()52 bool IsEmpty() const { return length() == 0; } 53 54 // Puts the string into the V8 heap. 55 void Internalize(Isolate* isolate); 56 57 // This function can be called after internalizing. string()58 V8_INLINE Handle<String> string() const { 59 DCHECK_NOT_NULL(string_); 60 return Handle<String>(string_); 61 } 62 next()63 AstString* next() { return next_; } next_location()64 AstString** next_location() { return &next_; } 65 66 protected: set_string(Handle<String> string)67 void set_string(Handle<String> string) { string_ = string.location(); } 68 // {string_} is stored as String** instead of a Handle<String> so it can be 69 // stored in a union with {next_}. 70 union { 71 AstString* next_; 72 String** string_; 73 }; 74 // Poor-man's virtual dispatch to AstRawString / AstConsString. Takes less 75 // memory. 76 class IsRawStringBits : public BitField<bool, 0, 1> {}; 77 int bit_field_; 78 }; 79 80 81 class AstRawString final : public AstString { 82 public: length()83 int length() const { 84 if (is_one_byte()) return literal_bytes_.length(); 85 return literal_bytes_.length() / 2; 86 } 87 byte_length()88 int byte_length() const { return literal_bytes_.length(); } 89 90 void Internalize(Isolate* isolate); 91 92 bool AsArrayIndex(uint32_t* index) const; 93 94 // The string is not null-terminated, use length() to find out the length. raw_data()95 const unsigned char* raw_data() const { 96 return literal_bytes_.start(); 97 } 98 is_one_byte()99 bool is_one_byte() const { return IsOneByteBits::decode(bit_field_); } 100 101 bool IsOneByteEqualTo(const char* data) const; FirstCharacter()102 uint16_t FirstCharacter() const { 103 if (is_one_byte()) return literal_bytes_[0]; 104 const uint16_t* c = 105 reinterpret_cast<const uint16_t*>(literal_bytes_.start()); 106 return *c; 107 } 108 109 static bool Compare(void* a, void* b); 110 111 // For storing AstRawStrings in a hash map. hash()112 uint32_t hash() const { 113 return hash_; 114 } 115 116 private: 117 friend class AstRawStringInternalizationKey; 118 friend class AstStringConstants; 119 friend class AstValueFactory; 120 AstRawString(bool is_one_byte,const Vector<const byte> & literal_bytes,uint32_t hash)121 AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes, 122 uint32_t hash) 123 : AstString(true), hash_(hash), literal_bytes_(literal_bytes) { 124 bit_field_ |= IsOneByteBits::encode(is_one_byte); 125 } 126 AstRawString()127 AstRawString() : AstString(true), hash_(0) { 128 bit_field_ |= IsOneByteBits::encode(true); 129 } 130 131 class IsOneByteBits : public BitField<bool, IsRawStringBits::kNext, 1> {}; 132 133 uint32_t hash_; 134 // Points to memory owned by Zone. 135 Vector<const byte> literal_bytes_; 136 }; 137 138 139 class AstConsString final : public AstString { 140 public: AstConsString(const AstString * left,const AstString * right)141 AstConsString(const AstString* left, const AstString* right) 142 : AstString(false), 143 length_(left->length() + right->length()), 144 left_(left), 145 right_(right) {} 146 length()147 int length() const { return length_; } 148 149 void Internalize(Isolate* isolate); 150 151 private: 152 const int length_; 153 const AstString* left_; 154 const AstString* right_; 155 }; 156 157 enum class AstSymbol : uint8_t { kHomeObjectSymbol }; 158 159 // AstValue is either a string, a symbol, a number, a string array, a boolean, 160 // or a special value (null, undefined, the hole). 161 class AstValue : public ZoneObject { 162 public: IsString()163 bool IsString() const { 164 return type_ == STRING; 165 } 166 IsSymbol()167 bool IsSymbol() const { return type_ == SYMBOL; } 168 IsNumber()169 bool IsNumber() const { return IsSmi() || IsHeapNumber(); } 170 ContainsDot()171 bool ContainsDot() const { 172 return type_ == NUMBER_WITH_DOT || type_ == SMI_WITH_DOT; 173 } 174 AsString()175 const AstRawString* AsString() const { 176 CHECK_EQ(STRING, type_); 177 return string_; 178 } 179 AsSymbol()180 AstSymbol AsSymbol() const { 181 CHECK_EQ(SYMBOL, type_); 182 return symbol_; 183 } 184 AsNumber()185 double AsNumber() const { 186 if (IsHeapNumber()) return number_; 187 if (IsSmi()) return smi_; 188 UNREACHABLE(); 189 return 0; 190 } 191 AsSmi()192 Smi* AsSmi() const { 193 CHECK(IsSmi()); 194 return Smi::FromInt(smi_); 195 } 196 ToUint32(uint32_t * value)197 bool ToUint32(uint32_t* value) const { 198 if (IsSmi()) { 199 int num = smi_; 200 if (num < 0) return false; 201 *value = static_cast<uint32_t>(num); 202 return true; 203 } 204 if (IsHeapNumber()) { 205 return DoubleToUint32IfEqualToSelf(number_, value); 206 } 207 return false; 208 } 209 EqualsString(const AstRawString * string)210 bool EqualsString(const AstRawString* string) const { 211 return type_ == STRING && string_ == string; 212 } 213 214 bool IsPropertyName() const; 215 216 bool BooleanValue() const; 217 IsSmi()218 bool IsSmi() const { return type_ == SMI || type_ == SMI_WITH_DOT; } IsHeapNumber()219 bool IsHeapNumber() const { 220 return type_ == NUMBER || type_ == NUMBER_WITH_DOT; 221 } IsFalse()222 bool IsFalse() const { return type_ == BOOLEAN && !bool_; } IsTrue()223 bool IsTrue() const { return type_ == BOOLEAN && bool_; } IsUndefined()224 bool IsUndefined() const { return type_ == UNDEFINED; } IsTheHole()225 bool IsTheHole() const { return type_ == THE_HOLE; } IsNull()226 bool IsNull() const { return type_ == NULL_TYPE; } 227 228 void Internalize(Isolate* isolate); 229 230 // Can be called after Internalize has been called. value()231 V8_INLINE Handle<Object> value() const { 232 if (type_ == STRING) { 233 return string_->string(); 234 } 235 DCHECK_NOT_NULL(value_); 236 return Handle<Object>(value_); 237 } next()238 AstValue* next() const { return next_; } set_next(AstValue * next)239 void set_next(AstValue* next) { next_ = next; } 240 241 private: set_value(Handle<Object> object)242 void set_value(Handle<Object> object) { value_ = object.location(); } 243 friend class AstValueFactory; 244 245 enum Type { 246 STRING, 247 SYMBOL, 248 NUMBER, 249 NUMBER_WITH_DOT, 250 SMI, 251 SMI_WITH_DOT, 252 BOOLEAN, 253 NULL_TYPE, 254 UNDEFINED, 255 THE_HOLE 256 }; 257 AstValue(const AstRawString * s)258 explicit AstValue(const AstRawString* s) : type_(STRING), next_(nullptr) { 259 string_ = s; 260 } 261 AstValue(AstSymbol symbol)262 explicit AstValue(AstSymbol symbol) : type_(SYMBOL), next_(nullptr) { 263 symbol_ = symbol; 264 } 265 AstValue(double n,bool with_dot)266 explicit AstValue(double n, bool with_dot) : next_(nullptr) { 267 int int_value; 268 if (DoubleToSmiInteger(n, &int_value)) { 269 type_ = with_dot ? SMI_WITH_DOT : SMI; 270 smi_ = int_value; 271 } else { 272 type_ = with_dot ? NUMBER_WITH_DOT : NUMBER; 273 number_ = n; 274 } 275 } 276 AstValue(Type t,int i)277 AstValue(Type t, int i) : type_(t), next_(nullptr) { 278 DCHECK(type_ == SMI); 279 smi_ = i; 280 } 281 AstValue(bool b)282 explicit AstValue(bool b) : type_(BOOLEAN), next_(nullptr) { bool_ = b; } 283 AstValue(Type t)284 explicit AstValue(Type t) : type_(t), next_(nullptr) { 285 DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE); 286 } 287 288 Type type_; 289 290 // {value_} is stored as Object** instead of a Handle<Object> so it can be 291 // stored in a union with {next_}. 292 union { 293 Object** value_; // if internalized 294 AstValue* next_; // if !internalized 295 }; 296 297 // Uninternalized value. 298 union { 299 const AstRawString* string_; 300 double number_; 301 int smi_; 302 bool bool_; 303 AstSymbol symbol_; 304 }; 305 }; 306 307 // For generating constants. 308 #define STRING_CONSTANTS(F) \ 309 F(anonymous_function, "(anonymous function)") \ 310 F(arguments, "arguments") \ 311 F(async, "async") \ 312 F(await, "await") \ 313 F(constructor, "constructor") \ 314 F(default, "default") \ 315 F(done, "done") \ 316 F(dot, ".") \ 317 F(dot_for, ".for") \ 318 F(dot_generator_object, ".generator_object") \ 319 F(dot_iterator, ".iterator") \ 320 F(dot_result, ".result") \ 321 F(dot_switch_tag, ".switch_tag") \ 322 F(dot_catch, ".catch") \ 323 F(empty, "") \ 324 F(eval, "eval") \ 325 F(function, "function") \ 326 F(get_space, "get ") \ 327 F(length, "length") \ 328 F(let, "let") \ 329 F(name, "name") \ 330 F(native, "native") \ 331 F(new_target, ".new.target") \ 332 F(next, "next") \ 333 F(proto, "__proto__") \ 334 F(prototype, "prototype") \ 335 F(return, "return") \ 336 F(set_space, "set ") \ 337 F(star_default_star, "*default*") \ 338 F(this, "this") \ 339 F(this_function, ".this_function") \ 340 F(throw, "throw") \ 341 F(undefined, "undefined") \ 342 F(use_asm, "use asm") \ 343 F(use_strict, "use strict") \ 344 F(value, "value") 345 346 class AstStringConstants final { 347 public: AstStringConstants(Isolate * isolate,uint32_t hash_seed)348 AstStringConstants(Isolate* isolate, uint32_t hash_seed) 349 : zone_(isolate->allocator(), ZONE_NAME), 350 string_table_(AstRawString::Compare), 351 hash_seed_(hash_seed) { 352 DCHECK(ThreadId::Current().Equals(isolate->thread_id())); 353 #define F(name, str) \ 354 { \ 355 const char* data = str; \ 356 Vector<const uint8_t> literal(reinterpret_cast<const uint8_t*>(data), \ 357 static_cast<int>(strlen(data))); \ 358 uint32_t hash = StringHasher::HashSequentialString<uint8_t>( \ 359 literal.start(), literal.length(), hash_seed_); \ 360 name##_string_ = new (&zone_) AstRawString(true, literal, hash); \ 361 /* The Handle returned by the factory is located on the roots */ \ 362 /* array, not on the temporary HandleScope, so this is safe. */ \ 363 name##_string_->set_string(isolate->factory()->name##_string()); \ 364 base::HashMap::Entry* entry = \ 365 string_table_.InsertNew(name##_string_, name##_string_->hash()); \ 366 DCHECK(entry->value == nullptr); \ 367 entry->value = reinterpret_cast<void*>(1); \ 368 } 369 STRING_CONSTANTS(F) 370 #undef F 371 } 372 373 #define F(name, str) \ 374 const AstRawString* name##_string() const { return name##_string_; } STRING_CONSTANTS(F)375 STRING_CONSTANTS(F) 376 #undef F 377 378 uint32_t hash_seed() const { return hash_seed_; } string_table()379 const base::CustomMatcherHashMap* string_table() const { 380 return &string_table_; 381 } 382 383 private: 384 Zone zone_; 385 base::CustomMatcherHashMap string_table_; 386 uint32_t hash_seed_; 387 388 #define F(name, str) AstRawString* name##_string_; 389 STRING_CONSTANTS(F) 390 #undef F 391 392 DISALLOW_COPY_AND_ASSIGN(AstStringConstants); 393 }; 394 395 #define OTHER_CONSTANTS(F) \ 396 F(true_value) \ 397 F(false_value) \ 398 F(null_value) \ 399 F(undefined_value) \ 400 F(the_hole_value) 401 402 class AstValueFactory { 403 public: AstValueFactory(Zone * zone,const AstStringConstants * string_constants,uint32_t hash_seed)404 AstValueFactory(Zone* zone, const AstStringConstants* string_constants, 405 uint32_t hash_seed) 406 : string_table_(string_constants->string_table()), 407 values_(nullptr), 408 strings_(nullptr), 409 strings_end_(&strings_), 410 string_constants_(string_constants), 411 zone_(zone), 412 hash_seed_(hash_seed) { 413 #define F(name) name##_ = nullptr; 414 OTHER_CONSTANTS(F) 415 #undef F 416 DCHECK_EQ(hash_seed, string_constants->hash_seed()); 417 std::fill(smis_, smis_ + arraysize(smis_), nullptr); 418 std::fill(one_character_strings_, 419 one_character_strings_ + arraysize(one_character_strings_), 420 nullptr); 421 } 422 zone()423 Zone* zone() const { return zone_; } 424 GetOneByteString(Vector<const uint8_t> literal)425 const AstRawString* GetOneByteString(Vector<const uint8_t> literal) { 426 return GetOneByteStringInternal(literal); 427 } GetOneByteString(const char * string)428 const AstRawString* GetOneByteString(const char* string) { 429 return GetOneByteString(Vector<const uint8_t>( 430 reinterpret_cast<const uint8_t*>(string), StrLength(string))); 431 } GetTwoByteString(Vector<const uint16_t> literal)432 const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) { 433 return GetTwoByteStringInternal(literal); 434 } 435 const AstRawString* GetString(Handle<String> literal); 436 const AstConsString* NewConsString(const AstString* left, 437 const AstString* right); 438 439 V8_EXPORT_PRIVATE void Internalize(Isolate* isolate); 440 441 #define F(name, str) \ 442 const AstRawString* name##_string() { \ 443 return string_constants_->name##_string(); \ 444 } 445 STRING_CONSTANTS(F) 446 #undef F 447 448 V8_EXPORT_PRIVATE const AstValue* NewString(const AstRawString* string); 449 // A JavaScript symbol (ECMA-262 edition 6). 450 const AstValue* NewSymbol(AstSymbol symbol); 451 V8_EXPORT_PRIVATE const AstValue* NewNumber(double number, 452 bool with_dot = false); 453 const AstValue* NewSmi(uint32_t number); 454 const AstValue* NewBoolean(bool b); 455 const AstValue* NewStringList(ZoneList<const AstRawString*>* strings); 456 const AstValue* NewNull(); 457 const AstValue* NewUndefined(); 458 const AstValue* NewTheHole(); 459 460 private: 461 static const uint32_t kMaxCachedSmi = 1 << 10; 462 463 STATIC_ASSERT(kMaxCachedSmi <= Smi::kMaxValue); 464 AddValue(AstValue * value)465 AstValue* AddValue(AstValue* value) { 466 value->set_next(values_); 467 values_ = value; 468 return value; 469 } AddString(AstString * string)470 AstString* AddString(AstString* string) { 471 *strings_end_ = string; 472 strings_end_ = string->next_location(); 473 return string; 474 } ResetStrings()475 void ResetStrings() { 476 strings_ = nullptr; 477 strings_end_ = &strings_; 478 } 479 V8_EXPORT_PRIVATE AstRawString* GetOneByteStringInternal( 480 Vector<const uint8_t> literal); 481 AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal); 482 AstRawString* GetString(uint32_t hash, bool is_one_byte, 483 Vector<const byte> literal_bytes); 484 485 // All strings are copied here, one after another (no NULLs inbetween). 486 base::CustomMatcherHashMap string_table_; 487 // For keeping track of all AstValues and AstRawStrings we've created (so that 488 // they can be internalized later). 489 AstValue* values_; 490 491 // We need to keep track of strings_ in order since cons strings require their 492 // members to be internalized first. 493 AstString* strings_; 494 AstString** strings_end_; 495 496 // Holds constant string values which are shared across the isolate. 497 const AstStringConstants* string_constants_; 498 499 // Caches for faster access: small numbers, one character lowercase strings 500 // (for minified code). 501 AstValue* smis_[kMaxCachedSmi + 1]; 502 AstRawString* one_character_strings_[26]; 503 504 Zone* zone_; 505 506 uint32_t hash_seed_; 507 508 #define F(name) AstValue* name##_; 509 OTHER_CONSTANTS(F) 510 #undef F 511 }; 512 } // namespace internal 513 } // namespace v8 514 515 #undef STRING_CONSTANTS 516 #undef OTHER_CONSTANTS 517 518 #endif // V8_AST_AST_VALUE_FACTORY_H_ 519