1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_SERIALIZE_H_
6 #define V8_SERIALIZE_H_
7
8 #include "src/hashmap.h"
9
10 namespace v8 {
11 namespace internal {
12
13 // A TypeCode is used to distinguish different kinds of external reference.
14 // It is a single bit to make testing for types easy.
15 enum TypeCode {
16 UNCLASSIFIED, // One-of-a-kind references.
17 BUILTIN,
18 RUNTIME_FUNCTION,
19 IC_UTILITY,
20 STATS_COUNTER,
21 TOP_ADDRESS,
22 C_BUILTIN,
23 EXTENSION,
24 ACCESSOR,
25 RUNTIME_ENTRY,
26 STUB_CACHE_TABLE,
27 LAZY_DEOPTIMIZATION
28 };
29
30 const int kTypeCodeCount = LAZY_DEOPTIMIZATION + 1;
31 const int kFirstTypeCode = UNCLASSIFIED;
32
33 const int kReferenceIdBits = 16;
34 const int kReferenceIdMask = (1 << kReferenceIdBits) - 1;
35 const int kReferenceTypeShift = kReferenceIdBits;
36
37 const int kDeoptTableSerializeEntryCount = 64;
38
39 // ExternalReferenceTable is a helper class that defines the relationship
40 // between external references and their encodings. It is used to build
41 // hashmaps in ExternalReferenceEncoder and ExternalReferenceDecoder.
42 class ExternalReferenceTable {
43 public:
44 static ExternalReferenceTable* instance(Isolate* isolate);
45
~ExternalReferenceTable()46 ~ExternalReferenceTable() { }
47
size()48 int size() const { return refs_.length(); }
49
address(int i)50 Address address(int i) { return refs_[i].address; }
51
code(int i)52 uint32_t code(int i) { return refs_[i].code; }
53
name(int i)54 const char* name(int i) { return refs_[i].name; }
55
max_id(int code)56 int max_id(int code) { return max_id_[code]; }
57
58 private:
ExternalReferenceTable(Isolate * isolate)59 explicit ExternalReferenceTable(Isolate* isolate) : refs_(64) {
60 PopulateTable(isolate);
61 }
62
63 struct ExternalReferenceEntry {
64 Address address;
65 uint32_t code;
66 const char* name;
67 };
68
69 void PopulateTable(Isolate* isolate);
70
71 // For a few types of references, we can get their address from their id.
72 void AddFromId(TypeCode type,
73 uint16_t id,
74 const char* name,
75 Isolate* isolate);
76
77 // For other types of references, the caller will figure out the address.
78 void Add(Address address, TypeCode type, uint16_t id, const char* name);
79
80 List<ExternalReferenceEntry> refs_;
81 int max_id_[kTypeCodeCount];
82 };
83
84
85 class ExternalReferenceEncoder {
86 public:
87 explicit ExternalReferenceEncoder(Isolate* isolate);
88
89 uint32_t Encode(Address key) const;
90
91 const char* NameOfAddress(Address key) const;
92
93 private:
94 HashMap encodings_;
Hash(Address key)95 static uint32_t Hash(Address key) {
96 return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2);
97 }
98
99 int IndexOf(Address key) const;
100
101 void Put(Address key, int index);
102
103 Isolate* isolate_;
104 };
105
106
107 class ExternalReferenceDecoder {
108 public:
109 explicit ExternalReferenceDecoder(Isolate* isolate);
110 ~ExternalReferenceDecoder();
111
Decode(uint32_t key)112 Address Decode(uint32_t key) const {
113 if (key == 0) return NULL;
114 return *Lookup(key);
115 }
116
117 private:
118 Address** encodings_;
119
Lookup(uint32_t key)120 Address* Lookup(uint32_t key) const {
121 int type = key >> kReferenceTypeShift;
122 ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount);
123 int id = key & kReferenceIdMask;
124 return &encodings_[type][id];
125 }
126
Put(uint32_t key,Address value)127 void Put(uint32_t key, Address value) {
128 *Lookup(key) = value;
129 }
130
131 Isolate* isolate_;
132 };
133
134
135 class SnapshotByteSource {
136 public:
SnapshotByteSource(const byte * array,int length)137 SnapshotByteSource(const byte* array, int length)
138 : data_(array), length_(length), position_(0) { }
139
HasMore()140 bool HasMore() { return position_ < length_; }
141
Get()142 int Get() {
143 ASSERT(position_ < length_);
144 return data_[position_++];
145 }
146
GetUnalignedInt()147 int32_t GetUnalignedInt() {
148 #if defined(V8_HOST_CAN_READ_UNALIGNED) && __BYTE_ORDER == __LITTLE_ENDIAN
149 int32_t answer;
150 ASSERT(position_ + sizeof(answer) <= length_ + 0u);
151 answer = *reinterpret_cast<const int32_t*>(data_ + position_);
152 #else
153 int32_t answer = data_[position_];
154 answer |= data_[position_ + 1] << 8;
155 answer |= data_[position_ + 2] << 16;
156 answer |= data_[position_ + 3] << 24;
157 #endif
158 return answer;
159 }
160
Advance(int by)161 void Advance(int by) { position_ += by; }
162
163 inline void CopyRaw(byte* to, int number_of_bytes);
164
165 inline int GetInt();
166
167 bool AtEOF();
168
position()169 int position() { return position_; }
170
171 private:
172 const byte* data_;
173 int length_;
174 int position_;
175 };
176
177
178 // The Serializer/Deserializer class is a common superclass for Serializer and
179 // Deserializer which is used to store common constants and methods used by
180 // both.
181 class SerializerDeserializer: public ObjectVisitor {
182 public:
183 static void Iterate(Isolate* isolate, ObjectVisitor* visitor);
184
nop()185 static int nop() { return kNop; }
186
187 protected:
188 // Where the pointed-to object can be found:
189 enum Where {
190 kNewObject = 0, // Object is next in snapshot.
191 // 1-6 One per space.
192 kRootArray = 0x9, // Object is found in root array.
193 kPartialSnapshotCache = 0xa, // Object is in the cache.
194 kExternalReference = 0xb, // Pointer to an external reference.
195 kSkip = 0xc, // Skip n bytes.
196 kNop = 0xd, // Does nothing, used to pad.
197 // 0xe-0xf Free.
198 kBackref = 0x10, // Object is described relative to end.
199 // 0x11-0x16 One per space.
200 kBackrefWithSkip = 0x18, // Object is described relative to end.
201 // 0x19-0x1e One per space.
202 // 0x20-0x3f Used by misc. tags below.
203 kPointedToMask = 0x3f
204 };
205
206 // How to code the pointer to the object.
207 enum HowToCode {
208 kPlain = 0, // Straight pointer.
209 // What this means depends on the architecture:
210 kFromCode = 0x40, // A pointer inlined in code.
211 kHowToCodeMask = 0x40
212 };
213
214 // For kRootArrayConstants
215 enum WithSkip {
216 kNoSkipDistance = 0,
217 kHasSkipDistance = 0x40,
218 kWithSkipMask = 0x40
219 };
220
221 // Where to point within the object.
222 enum WhereToPoint {
223 kStartOfObject = 0,
224 kInnerPointer = 0x80, // First insn in code object or payload of cell.
225 kWhereToPointMask = 0x80
226 };
227
228 // Misc.
229 // Raw data to be copied from the snapshot. This byte code does not advance
230 // the current pointer, which is used for code objects, where we write the
231 // entire code in one memcpy, then fix up stuff with kSkip and other byte
232 // codes that overwrite data.
233 static const int kRawData = 0x20;
234 // Some common raw lengths: 0x21-0x3f. These autoadvance the current pointer.
235 // A tag emitted at strategic points in the snapshot to delineate sections.
236 // If the deserializer does not find these at the expected moments then it
237 // is an indication that the snapshot and the VM do not fit together.
238 // Examine the build process for architecture, version or configuration
239 // mismatches.
240 static const int kSynchronize = 0x70;
241 // Used for the source code of the natives, which is in the executable, but
242 // is referred to from external strings in the snapshot.
243 static const int kNativesStringResource = 0x71;
244 static const int kRepeat = 0x72;
245 static const int kConstantRepeat = 0x73;
246 // 0x73-0x7f Repeat last word (subtract 0x72 to get the count).
247 static const int kMaxRepeats = 0x7f - 0x72;
CodeForRepeats(int repeats)248 static int CodeForRepeats(int repeats) {
249 ASSERT(repeats >= 1 && repeats <= kMaxRepeats);
250 return 0x72 + repeats;
251 }
RepeatsForCode(int byte_code)252 static int RepeatsForCode(int byte_code) {
253 ASSERT(byte_code >= kConstantRepeat && byte_code <= 0x7f);
254 return byte_code - 0x72;
255 }
256 static const int kRootArrayConstants = 0xa0;
257 // 0xa0-0xbf Things from the first 32 elements of the root array.
258 static const int kRootArrayNumberOfConstantEncodings = 0x20;
RootArrayConstantFromByteCode(int byte_code)259 static int RootArrayConstantFromByteCode(int byte_code) {
260 return byte_code & 0x1f;
261 }
262
263 static const int kNumberOfSpaces = LO_SPACE;
264 static const int kAnyOldSpace = -1;
265
266 // A bitmask for getting the space out of an instruction.
267 static const int kSpaceMask = 7;
268 };
269
270
GetInt()271 int SnapshotByteSource::GetInt() {
272 // This way of variable-length encoding integers does not suffer from branch
273 // mispredictions.
274 uint32_t answer = GetUnalignedInt();
275 int bytes = answer & 3;
276 Advance(bytes);
277 uint32_t mask = 0xffffffffu;
278 mask >>= 32 - (bytes << 3);
279 answer &= mask;
280 answer >>= 2;
281 return answer;
282 }
283
284
CopyRaw(byte * to,int number_of_bytes)285 void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) {
286 MemCopy(to, data_ + position_, number_of_bytes);
287 position_ += number_of_bytes;
288 }
289
290
291 // A Deserializer reads a snapshot and reconstructs the Object graph it defines.
292 class Deserializer: public SerializerDeserializer {
293 public:
294 // Create a deserializer from a snapshot byte source.
295 explicit Deserializer(SnapshotByteSource* source);
296
297 virtual ~Deserializer();
298
299 // Deserialize the snapshot into an empty heap.
300 void Deserialize(Isolate* isolate);
301
302 // Deserialize a single object and the objects reachable from it.
303 void DeserializePartial(Isolate* isolate, Object** root);
304
set_reservation(int space_number,int reservation)305 void set_reservation(int space_number, int reservation) {
306 ASSERT(space_number >= 0);
307 ASSERT(space_number <= LAST_SPACE);
308 reservations_[space_number] = reservation;
309 }
310
311 private:
312 virtual void VisitPointers(Object** start, Object** end);
313
VisitRuntimeEntry(RelocInfo * rinfo)314 virtual void VisitRuntimeEntry(RelocInfo* rinfo) {
315 UNREACHABLE();
316 }
317
318 // Allocation sites are present in the snapshot, and must be linked into
319 // a list at deserialization time.
320 void RelinkAllocationSite(AllocationSite* site);
321
322 // Fills in some heap data in an area from start to end (non-inclusive). The
323 // space id is used for the write barrier. The object_address is the address
324 // of the object we are writing into, or NULL if we are not writing into an
325 // object, i.e. if we are writing a series of tagged values that are not on
326 // the heap.
327 void ReadChunk(
328 Object** start, Object** end, int space, Address object_address);
329 void ReadObject(int space_number, Object** write_back);
330
331 // This routine both allocates a new object, and also keeps
332 // track of where objects have been allocated so that we can
333 // fix back references when deserializing.
Allocate(int space_index,int size)334 Address Allocate(int space_index, int size) {
335 Address address = high_water_[space_index];
336 high_water_[space_index] = address + size;
337 HeapProfiler* profiler = isolate_->heap_profiler();
338 if (profiler->is_tracking_allocations()) {
339 profiler->AllocationEvent(address, size);
340 }
341 return address;
342 }
343
344 // This returns the address of an object that has been described in the
345 // snapshot as being offset bytes back in a particular space.
GetAddressFromEnd(int space)346 HeapObject* GetAddressFromEnd(int space) {
347 int offset = source_->GetInt();
348 offset <<= kObjectAlignmentBits;
349 return HeapObject::FromAddress(high_water_[space] - offset);
350 }
351
352 void FlushICacheForNewCodeObjects();
353
354 // Cached current isolate.
355 Isolate* isolate_;
356
357 SnapshotByteSource* source_;
358 // This is the address of the next object that will be allocated in each
359 // space. It is used to calculate the addresses of back-references.
360 Address high_water_[LAST_SPACE + 1];
361
362 int reservations_[LAST_SPACE + 1];
363 static const intptr_t kUninitializedReservation = -1;
364
365 ExternalReferenceDecoder* external_reference_decoder_;
366
367 DISALLOW_COPY_AND_ASSIGN(Deserializer);
368 };
369
370
371 class SnapshotByteSink {
372 public:
~SnapshotByteSink()373 virtual ~SnapshotByteSink() { }
374 virtual void Put(int byte, const char* description) = 0;
PutSection(int byte,const char * description)375 virtual void PutSection(int byte, const char* description) {
376 Put(byte, description);
377 }
378 void PutInt(uintptr_t integer, const char* description);
379 virtual int Position() = 0;
380 };
381
382
383 // Mapping objects to their location after deserialization.
384 // This is used during building, but not at runtime by V8.
385 class SerializationAddressMapper {
386 public:
SerializationAddressMapper()387 SerializationAddressMapper()
388 : no_allocation_(),
389 serialization_map_(new HashMap(HashMap::PointersMatch)) { }
390
~SerializationAddressMapper()391 ~SerializationAddressMapper() {
392 delete serialization_map_;
393 }
394
IsMapped(HeapObject * obj)395 bool IsMapped(HeapObject* obj) {
396 return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL;
397 }
398
MappedTo(HeapObject * obj)399 int MappedTo(HeapObject* obj) {
400 ASSERT(IsMapped(obj));
401 return static_cast<int>(reinterpret_cast<intptr_t>(
402 serialization_map_->Lookup(Key(obj), Hash(obj), false)->value));
403 }
404
AddMapping(HeapObject * obj,int to)405 void AddMapping(HeapObject* obj, int to) {
406 ASSERT(!IsMapped(obj));
407 HashMap::Entry* entry =
408 serialization_map_->Lookup(Key(obj), Hash(obj), true);
409 entry->value = Value(to);
410 }
411
412 private:
Hash(HeapObject * obj)413 static uint32_t Hash(HeapObject* obj) {
414 return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address()));
415 }
416
Key(HeapObject * obj)417 static void* Key(HeapObject* obj) {
418 return reinterpret_cast<void*>(obj->address());
419 }
420
Value(int v)421 static void* Value(int v) {
422 return reinterpret_cast<void*>(v);
423 }
424
425 DisallowHeapAllocation no_allocation_;
426 HashMap* serialization_map_;
427 DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper);
428 };
429
430
431 class CodeAddressMap;
432
433 // There can be only one serializer per V8 process.
434 class Serializer : public SerializerDeserializer {
435 public:
436 Serializer(Isolate* isolate, SnapshotByteSink* sink);
437 ~Serializer();
438 void VisitPointers(Object** start, Object** end);
439 // You can call this after serialization to find out how much space was used
440 // in each space.
CurrentAllocationAddress(int space)441 int CurrentAllocationAddress(int space) const {
442 ASSERT(space < kNumberOfSpaces);
443 return fullness_[space];
444 }
445
isolate()446 Isolate* isolate() const { return isolate_; }
447
address_mapper()448 SerializationAddressMapper* address_mapper() { return &address_mapper_; }
449 void PutRoot(int index,
450 HeapObject* object,
451 HowToCode how,
452 WhereToPoint where,
453 int skip);
454
455 protected:
456 static const int kInvalidRootIndex = -1;
457
458 int RootIndex(HeapObject* heap_object, HowToCode from);
root_index_wave_front()459 intptr_t root_index_wave_front() { return root_index_wave_front_; }
set_root_index_wave_front(intptr_t value)460 void set_root_index_wave_front(intptr_t value) {
461 ASSERT(value >= root_index_wave_front_);
462 root_index_wave_front_ = value;
463 }
464
465 class ObjectSerializer : public ObjectVisitor {
466 public:
ObjectSerializer(Serializer * serializer,Object * o,SnapshotByteSink * sink,HowToCode how_to_code,WhereToPoint where_to_point)467 ObjectSerializer(Serializer* serializer,
468 Object* o,
469 SnapshotByteSink* sink,
470 HowToCode how_to_code,
471 WhereToPoint where_to_point)
472 : serializer_(serializer),
473 object_(HeapObject::cast(o)),
474 sink_(sink),
475 reference_representation_(how_to_code + where_to_point),
476 bytes_processed_so_far_(0),
477 code_object_(o->IsCode()),
478 code_has_been_output_(false) { }
479 void Serialize();
480 void VisitPointers(Object** start, Object** end);
481 void VisitEmbeddedPointer(RelocInfo* target);
482 void VisitExternalReference(Address* p);
483 void VisitExternalReference(RelocInfo* rinfo);
484 void VisitCodeTarget(RelocInfo* target);
485 void VisitCodeEntry(Address entry_address);
486 void VisitCell(RelocInfo* rinfo);
487 void VisitRuntimeEntry(RelocInfo* reloc);
488 // Used for seralizing the external strings that hold the natives source.
489 void VisitExternalAsciiString(
490 v8::String::ExternalAsciiStringResource** resource);
491 // We can't serialize a heap with external two byte strings.
VisitExternalTwoByteString(v8::String::ExternalStringResource ** resource)492 void VisitExternalTwoByteString(
493 v8::String::ExternalStringResource** resource) {
494 UNREACHABLE();
495 }
496
497 private:
498 enum ReturnSkip { kCanReturnSkipInsteadOfSkipping, kIgnoringReturn };
499 // This function outputs or skips the raw data between the last pointer and
500 // up to the current position. It optionally can just return the number of
501 // bytes to skip instead of performing a skip instruction, in case the skip
502 // can be merged into the next instruction.
503 int OutputRawData(Address up_to, ReturnSkip return_skip = kIgnoringReturn);
504
505 Serializer* serializer_;
506 HeapObject* object_;
507 SnapshotByteSink* sink_;
508 int reference_representation_;
509 int bytes_processed_so_far_;
510 bool code_object_;
511 bool code_has_been_output_;
512 };
513
514 virtual void SerializeObject(Object* o,
515 HowToCode how_to_code,
516 WhereToPoint where_to_point,
517 int skip) = 0;
518 void SerializeReferenceToPreviousObject(
519 int space,
520 int address,
521 HowToCode how_to_code,
522 WhereToPoint where_to_point,
523 int skip);
524 void InitializeAllocators();
525 // This will return the space for an object.
526 static int SpaceOfObject(HeapObject* object);
527 int Allocate(int space, int size);
EncodeExternalReference(Address addr)528 int EncodeExternalReference(Address addr) {
529 return external_reference_encoder_->Encode(addr);
530 }
531
532 int SpaceAreaSize(int space);
533
534 // Some roots should not be serialized, because their actual value depends on
535 // absolute addresses and they are reset after deserialization, anyway.
536 bool ShouldBeSkipped(Object** current);
537
538 Isolate* isolate_;
539 // Keep track of the fullness of each space in order to generate
540 // relative addresses for back references.
541 int fullness_[LAST_SPACE + 1];
542 SnapshotByteSink* sink_;
543 ExternalReferenceEncoder* external_reference_encoder_;
544
545 SerializationAddressMapper address_mapper_;
546 intptr_t root_index_wave_front_;
547 void Pad();
548
549 friend class ObjectSerializer;
550 friend class Deserializer;
551
552 // We may not need the code address map for logging for every instance
553 // of the serializer. Initialize it on demand.
554 void InitializeCodeAddressMap();
555
556 private:
557 CodeAddressMap* code_address_map_;
558 DISALLOW_COPY_AND_ASSIGN(Serializer);
559 };
560
561
562 class PartialSerializer : public Serializer {
563 public:
PartialSerializer(Isolate * isolate,Serializer * startup_snapshot_serializer,SnapshotByteSink * sink)564 PartialSerializer(Isolate* isolate,
565 Serializer* startup_snapshot_serializer,
566 SnapshotByteSink* sink)
567 : Serializer(isolate, sink),
568 startup_serializer_(startup_snapshot_serializer) {
569 set_root_index_wave_front(Heap::kStrongRootListLength);
570 InitializeCodeAddressMap();
571 }
572
573 // Serialize the objects reachable from a single object pointer.
574 void Serialize(Object** o);
575 virtual void SerializeObject(Object* o,
576 HowToCode how_to_code,
577 WhereToPoint where_to_point,
578 int skip);
579
580 private:
581 int PartialSnapshotCacheIndex(HeapObject* o);
ShouldBeInThePartialSnapshotCache(HeapObject * o)582 bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
583 // Scripts should be referred only through shared function infos. We can't
584 // allow them to be part of the partial snapshot because they contain a
585 // unique ID, and deserializing several partial snapshots containing script
586 // would cause dupes.
587 ASSERT(!o->IsScript());
588 return o->IsName() || o->IsSharedFunctionInfo() ||
589 o->IsHeapNumber() || o->IsCode() ||
590 o->IsScopeInfo() ||
591 o->map() ==
592 startup_serializer_->isolate()->heap()->fixed_cow_array_map();
593 }
594
595
596 Serializer* startup_serializer_;
597 DISALLOW_COPY_AND_ASSIGN(PartialSerializer);
598 };
599
600
601 class StartupSerializer : public Serializer {
602 public:
StartupSerializer(Isolate * isolate,SnapshotByteSink * sink)603 StartupSerializer(Isolate* isolate, SnapshotByteSink* sink)
604 : Serializer(isolate, sink) {
605 // Clear the cache of objects used by the partial snapshot. After the
606 // strong roots have been serialized we can create a partial snapshot
607 // which will repopulate the cache with objects needed by that partial
608 // snapshot.
609 isolate->set_serialize_partial_snapshot_cache_length(0);
610 InitializeCodeAddressMap();
611 }
612 // Serialize the current state of the heap. The order is:
613 // 1) Strong references.
614 // 2) Partial snapshot cache.
615 // 3) Weak references (e.g. the string table).
616 virtual void SerializeStrongReferences();
617 virtual void SerializeObject(Object* o,
618 HowToCode how_to_code,
619 WhereToPoint where_to_point,
620 int skip);
621 void SerializeWeakReferences();
Serialize()622 void Serialize() {
623 SerializeStrongReferences();
624 SerializeWeakReferences();
625 Pad();
626 }
627 };
628
629
630 } } // namespace v8::internal
631
632 #endif // V8_SERIALIZE_H_
633