• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/memory-optimizer.h"
6 
7 #include "src/compiler/js-graph.h"
8 #include "src/compiler/linkage.h"
9 #include "src/compiler/node-matchers.h"
10 #include "src/compiler/node-properties.h"
11 #include "src/compiler/node.h"
12 #include "src/compiler/simplified-operator.h"
13 
14 namespace v8 {
15 namespace internal {
16 namespace compiler {
17 
MemoryOptimizer(JSGraph * jsgraph,Zone * zone)18 MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)
19     : jsgraph_(jsgraph),
20       empty_state_(AllocationState::Empty(zone)),
21       pending_(zone),
22       tokens_(zone),
23       zone_(zone),
24       graph_assembler_(jsgraph, nullptr, nullptr, zone) {}
25 
Optimize()26 void MemoryOptimizer::Optimize() {
27   EnqueueUses(graph()->start(), empty_state());
28   while (!tokens_.empty()) {
29     Token const token = tokens_.front();
30     tokens_.pop();
31     VisitNode(token.node, token.state);
32   }
33   DCHECK(pending_.empty());
34   DCHECK(tokens_.empty());
35 }
36 
AllocationGroup(Node * node,PretenureFlag pretenure,Zone * zone)37 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
38                                                   PretenureFlag pretenure,
39                                                   Zone* zone)
40     : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {
41   node_ids_.insert(node->id());
42 }
43 
AllocationGroup(Node * node,PretenureFlag pretenure,Node * size,Zone * zone)44 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
45                                                   PretenureFlag pretenure,
46                                                   Node* size, Zone* zone)
47     : node_ids_(zone), pretenure_(pretenure), size_(size) {
48   node_ids_.insert(node->id());
49 }
50 
Add(Node * node)51 void MemoryOptimizer::AllocationGroup::Add(Node* node) {
52   node_ids_.insert(node->id());
53 }
54 
Contains(Node * node) const55 bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {
56   return node_ids_.find(node->id()) != node_ids_.end();
57 }
58 
AllocationState()59 MemoryOptimizer::AllocationState::AllocationState()
60     : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
61 
AllocationState(AllocationGroup * group)62 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)
63     : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
64 
AllocationState(AllocationGroup * group,int size,Node * top)65 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,
66                                                   int size, Node* top)
67     : group_(group), size_(size), top_(top) {}
68 
IsNewSpaceAllocation() const69 bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {
70   return group() && group()->IsNewSpaceAllocation();
71 }
72 
VisitNode(Node * node,AllocationState const * state)73 void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {
74   DCHECK(!node->IsDead());
75   DCHECK_LT(0, node->op()->EffectInputCount());
76   switch (node->opcode()) {
77     case IrOpcode::kAllocate:
78       return VisitAllocate(node, state);
79     case IrOpcode::kCall:
80       return VisitCall(node, state);
81     case IrOpcode::kLoadElement:
82       return VisitLoadElement(node, state);
83     case IrOpcode::kLoadField:
84       return VisitLoadField(node, state);
85     case IrOpcode::kStoreElement:
86       return VisitStoreElement(node, state);
87     case IrOpcode::kStoreField:
88       return VisitStoreField(node, state);
89     case IrOpcode::kCheckedLoad:
90     case IrOpcode::kCheckedStore:
91     case IrOpcode::kDeoptimizeIf:
92     case IrOpcode::kDeoptimizeUnless:
93     case IrOpcode::kIfException:
94     case IrOpcode::kLoad:
95     case IrOpcode::kProtectedLoad:
96     case IrOpcode::kStore:
97     case IrOpcode::kProtectedStore:
98     case IrOpcode::kRetain:
99     case IrOpcode::kUnsafePointerAdd:
100       return VisitOtherEffect(node, state);
101     default:
102       break;
103   }
104   DCHECK_EQ(0, node->op()->EffectOutputCount());
105 }
106 
107 #define __ gasm()->
108 
VisitAllocate(Node * node,AllocationState const * state)109 void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {
110   DCHECK_EQ(IrOpcode::kAllocate, node->opcode());
111   Node* value;
112   Node* size = node->InputAt(0);
113   Node* effect = node->InputAt(1);
114   Node* control = node->InputAt(2);
115 
116   gasm()->Reset(effect, control);
117 
118   PretenureFlag pretenure = PretenureFlagOf(node->op());
119 
120   // Propagate tenuring from outer allocations to inner allocations, i.e.
121   // when we allocate an object in old space and store a newly allocated
122   // child object into the pretenured object, then the newly allocated
123   // child object also should get pretenured to old space.
124   if (pretenure == TENURED) {
125     for (Edge const edge : node->use_edges()) {
126       Node* const user = edge.from();
127       if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) {
128         Node* const child = user->InputAt(1);
129         if (child->opcode() == IrOpcode::kAllocate &&
130             PretenureFlagOf(child->op()) == NOT_TENURED) {
131           NodeProperties::ChangeOp(child, node->op());
132           break;
133         }
134       }
135     }
136   } else {
137     DCHECK_EQ(NOT_TENURED, pretenure);
138     for (Edge const edge : node->use_edges()) {
139       Node* const user = edge.from();
140       if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) {
141         Node* const parent = user->InputAt(0);
142         if (parent->opcode() == IrOpcode::kAllocate &&
143             PretenureFlagOf(parent->op()) == TENURED) {
144           pretenure = TENURED;
145           break;
146         }
147       }
148     }
149   }
150 
151   // Determine the top/limit addresses.
152   Node* top_address = __ ExternalConstant(
153       pretenure == NOT_TENURED
154           ? ExternalReference::new_space_allocation_top_address(isolate())
155           : ExternalReference::old_space_allocation_top_address(isolate()));
156   Node* limit_address = __ ExternalConstant(
157       pretenure == NOT_TENURED
158           ? ExternalReference::new_space_allocation_limit_address(isolate())
159           : ExternalReference::old_space_allocation_limit_address(isolate()));
160 
161   // Check if we can fold this allocation into a previous allocation represented
162   // by the incoming {state}.
163   Int32Matcher m(size);
164   if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) {
165     int32_t const object_size = m.Value();
166     if (state->size() <= kMaxRegularHeapObjectSize - object_size &&
167         state->group()->pretenure() == pretenure) {
168       // We can fold this Allocate {node} into the allocation {group}
169       // represented by the given {state}. Compute the upper bound for
170       // the new {state}.
171       int32_t const state_size = state->size() + object_size;
172 
173       // Update the reservation check to the actual maximum upper bound.
174       AllocationGroup* const group = state->group();
175       if (OpParameter<int32_t>(group->size()) < state_size) {
176         NodeProperties::ChangeOp(group->size(),
177                                  common()->Int32Constant(state_size));
178       }
179 
180       // Update the allocation top with the new object allocation.
181       // TODO(bmeurer): Defer writing back top as much as possible.
182       Node* top = __ IntAdd(state->top(), __ IntPtrConstant(object_size));
183       __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
184                                    kNoWriteBarrier),
185                top_address, __ IntPtrConstant(0), top);
186 
187       // Compute the effective inner allocated address.
188       value = __ BitcastWordToTagged(
189           __ IntAdd(state->top(), __ IntPtrConstant(kHeapObjectTag)));
190 
191       // Extend the allocation {group}.
192       group->Add(value);
193       state = AllocationState::Open(group, state_size, top, zone());
194     } else {
195       auto call_runtime = __ MakeDeferredLabel<1>();
196       auto done = __ MakeLabel<2>(MachineType::PointerRepresentation());
197 
198       // Setup a mutable reservation size node; will be patched as we fold
199       // additional allocations into this new group.
200       Node* size = __ UniqueInt32Constant(object_size);
201 
202       // Load allocation top and limit.
203       Node* top =
204           __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
205       Node* limit =
206           __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));
207 
208       // Check if we need to collect garbage before we can start bump pointer
209       // allocation (always done for folded allocations).
210       Node* check = __ UintLessThan(
211           __ IntAdd(top,
212                     machine()->Is64() ? __ ChangeInt32ToInt64(size) : size),
213           limit);
214 
215       __ GotoUnless(check, &call_runtime);
216       __ Goto(&done, top);
217 
218       __ Bind(&call_runtime);
219       {
220         Node* target =
221             pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
222                                      : __
223                                        AllocateInOldSpaceStubConstant();
224         if (!allocate_operator_.is_set()) {
225           CallDescriptor* descriptor =
226               Linkage::GetAllocateCallDescriptor(graph()->zone());
227           allocate_operator_.set(common()->Call(descriptor));
228         }
229         Node* vfalse = __ Call(allocate_operator_.get(), target, size);
230         vfalse = __ IntSub(vfalse, __ IntPtrConstant(kHeapObjectTag));
231         __ Goto(&done, vfalse);
232       }
233 
234       __ Bind(&done);
235 
236       // Compute the new top and write it back.
237       top = __ IntAdd(done.PhiAt(0), __ IntPtrConstant(object_size));
238       __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
239                                    kNoWriteBarrier),
240                top_address, __ IntPtrConstant(0), top);
241 
242       // Compute the initial object address.
243       value = __ BitcastWordToTagged(
244           __ IntAdd(done.PhiAt(0), __ IntPtrConstant(kHeapObjectTag)));
245 
246       // Start a new allocation group.
247       AllocationGroup* group =
248           new (zone()) AllocationGroup(value, pretenure, size, zone());
249       state = AllocationState::Open(group, object_size, top, zone());
250     }
251   } else {
252     auto call_runtime = __ MakeDeferredLabel<1>();
253     auto done = __ MakeLabel<2>(MachineRepresentation::kTaggedPointer);
254 
255     // Load allocation top and limit.
256     Node* top =
257         __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
258     Node* limit =
259         __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));
260 
261     // Compute the new top.
262     Node* new_top =
263         __ IntAdd(top, machine()->Is64() ? __ ChangeInt32ToInt64(size) : size);
264 
265     // Check if we can do bump pointer allocation here.
266     Node* check = __ UintLessThan(new_top, limit);
267     __ GotoUnless(check, &call_runtime);
268     __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
269                                  kNoWriteBarrier),
270              top_address, __ IntPtrConstant(0), new_top);
271     __ Goto(&done, __ BitcastWordToTagged(
272                        __ IntAdd(top, __ IntPtrConstant(kHeapObjectTag))));
273 
274     __ Bind(&call_runtime);
275     Node* target =
276         pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
277                                  : __
278                                    AllocateInOldSpaceStubConstant();
279     if (!allocate_operator_.is_set()) {
280       CallDescriptor* descriptor =
281           Linkage::GetAllocateCallDescriptor(graph()->zone());
282       allocate_operator_.set(common()->Call(descriptor));
283     }
284     __ Goto(&done, __ Call(allocate_operator_.get(), target, size));
285 
286     __ Bind(&done);
287     value = done.PhiAt(0);
288 
289     // Create an unfoldable allocation group.
290     AllocationGroup* group =
291         new (zone()) AllocationGroup(value, pretenure, zone());
292     state = AllocationState::Closed(group, zone());
293   }
294 
295   effect = __ ExtractCurrentEffect();
296   control = __ ExtractCurrentControl();
297   USE(control);  // Floating control, dropped on the floor.
298 
299   // Replace all effect uses of {node} with the {effect}, enqueue the
300   // effect uses for further processing, and replace all value uses of
301   // {node} with the {value}.
302   for (Edge edge : node->use_edges()) {
303     if (NodeProperties::IsEffectEdge(edge)) {
304       EnqueueUse(edge.from(), edge.index(), state);
305       edge.UpdateTo(effect);
306     } else {
307       DCHECK(NodeProperties::IsValueEdge(edge));
308       edge.UpdateTo(value);
309     }
310   }
311 
312   // Kill the {node} to make sure we don't leave dangling dead uses.
313   node->Kill();
314 }
315 
316 #undef __
317 
VisitCall(Node * node,AllocationState const * state)318 void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {
319   DCHECK_EQ(IrOpcode::kCall, node->opcode());
320   // If the call can allocate, we start with a fresh state.
321   if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
322     state = empty_state();
323   }
324   EnqueueUses(node, state);
325 }
326 
VisitLoadElement(Node * node,AllocationState const * state)327 void MemoryOptimizer::VisitLoadElement(Node* node,
328                                        AllocationState const* state) {
329   DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());
330   ElementAccess const& access = ElementAccessOf(node->op());
331   Node* index = node->InputAt(1);
332   node->ReplaceInput(1, ComputeIndex(access, index));
333   NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
334   EnqueueUses(node, state);
335 }
336 
VisitLoadField(Node * node,AllocationState const * state)337 void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {
338   DCHECK_EQ(IrOpcode::kLoadField, node->opcode());
339   FieldAccess const& access = FieldAccessOf(node->op());
340   Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
341   node->InsertInput(graph()->zone(), 1, offset);
342   NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
343   EnqueueUses(node, state);
344 }
345 
VisitStoreElement(Node * node,AllocationState const * state)346 void MemoryOptimizer::VisitStoreElement(Node* node,
347                                         AllocationState const* state) {
348   DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());
349   ElementAccess const& access = ElementAccessOf(node->op());
350   Node* object = node->InputAt(0);
351   Node* index = node->InputAt(1);
352   WriteBarrierKind write_barrier_kind =
353       ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
354   node->ReplaceInput(1, ComputeIndex(access, index));
355   NodeProperties::ChangeOp(
356       node, machine()->Store(StoreRepresentation(
357                 access.machine_type.representation(), write_barrier_kind)));
358   EnqueueUses(node, state);
359 }
360 
VisitStoreField(Node * node,AllocationState const * state)361 void MemoryOptimizer::VisitStoreField(Node* node,
362                                       AllocationState const* state) {
363   DCHECK_EQ(IrOpcode::kStoreField, node->opcode());
364   FieldAccess const& access = FieldAccessOf(node->op());
365   Node* object = node->InputAt(0);
366   WriteBarrierKind write_barrier_kind =
367       ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
368   Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
369   node->InsertInput(graph()->zone(), 1, offset);
370   NodeProperties::ChangeOp(
371       node, machine()->Store(StoreRepresentation(
372                 access.machine_type.representation(), write_barrier_kind)));
373   EnqueueUses(node, state);
374 }
375 
VisitOtherEffect(Node * node,AllocationState const * state)376 void MemoryOptimizer::VisitOtherEffect(Node* node,
377                                        AllocationState const* state) {
378   EnqueueUses(node, state);
379 }
380 
ComputeIndex(ElementAccess const & access,Node * key)381 Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {
382   Node* index;
383   if (machine()->Is64()) {
384     // On 64-bit platforms, we need to feed a Word64 index to the Load and
385     // Store operators. Since LoadElement or StoreElement don't do any bounds
386     // checking themselves, we can be sure that the {key} was already checked
387     // and is in valid range, so we can do the further address computation on
388     // Word64 below, which ideally allows us to fuse the address computation
389     // with the actual memory access operation on Intel platforms.
390     index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key);
391   } else {
392     index = key;
393   }
394   int const element_size_shift =
395       ElementSizeLog2Of(access.machine_type.representation());
396   if (element_size_shift) {
397     index = graph()->NewNode(machine()->WordShl(), index,
398                              jsgraph()->IntPtrConstant(element_size_shift));
399   }
400   int const fixed_offset = access.header_size - access.tag();
401   if (fixed_offset) {
402     index = graph()->NewNode(machine()->IntAdd(), index,
403                              jsgraph()->IntPtrConstant(fixed_offset));
404   }
405   return index;
406 }
407 
ComputeWriteBarrierKind(Node * object,AllocationState const * state,WriteBarrierKind write_barrier_kind)408 WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(
409     Node* object, AllocationState const* state,
410     WriteBarrierKind write_barrier_kind) {
411   if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {
412     write_barrier_kind = kNoWriteBarrier;
413   }
414   return write_barrier_kind;
415 }
416 
MergeStates(AllocationStates const & states)417 MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(
418     AllocationStates const& states) {
419   // Check if all states are the same; or at least if all allocation
420   // states belong to the same allocation group.
421   AllocationState const* state = states.front();
422   AllocationGroup* group = state->group();
423   for (size_t i = 1; i < states.size(); ++i) {
424     if (states[i] != state) state = nullptr;
425     if (states[i]->group() != group) group = nullptr;
426   }
427   if (state == nullptr) {
428     if (group != nullptr) {
429       // We cannot fold any more allocations into this group, but we can still
430       // eliminate write barriers on stores to this group.
431       // TODO(bmeurer): We could potentially just create a Phi here to merge
432       // the various tops; but we need to pay special attention not to create
433       // an unschedulable graph.
434       state = AllocationState::Closed(group, zone());
435     } else {
436       // The states are from different allocation groups.
437       state = empty_state();
438     }
439   }
440   return state;
441 }
442 
EnqueueMerge(Node * node,int index,AllocationState const * state)443 void MemoryOptimizer::EnqueueMerge(Node* node, int index,
444                                    AllocationState const* state) {
445   DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());
446   int const input_count = node->InputCount() - 1;
447   DCHECK_LT(0, input_count);
448   Node* const control = node->InputAt(input_count);
449   if (control->opcode() == IrOpcode::kLoop) {
450     // For loops we always start with an empty state at the beginning.
451     if (index == 0) EnqueueUses(node, empty_state());
452   } else {
453     DCHECK_EQ(IrOpcode::kMerge, control->opcode());
454     // Check if we already know about this pending merge.
455     NodeId const id = node->id();
456     auto it = pending_.find(id);
457     if (it == pending_.end()) {
458       // Insert a new pending merge.
459       it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;
460     }
461     // Add the next input state.
462     it->second.push_back(state);
463     // Check if states for all inputs are available by now.
464     if (it->second.size() == static_cast<size_t>(input_count)) {
465       // All inputs to this effect merge are done, merge the states given all
466       // input constraints, drop the pending merge and enqueue uses of the
467       // EffectPhi {node}.
468       state = MergeStates(it->second);
469       EnqueueUses(node, state);
470       pending_.erase(it);
471     }
472   }
473 }
474 
EnqueueUses(Node * node,AllocationState const * state)475 void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {
476   for (Edge const edge : node->use_edges()) {
477     if (NodeProperties::IsEffectEdge(edge)) {
478       EnqueueUse(edge.from(), edge.index(), state);
479     }
480   }
481 }
482 
EnqueueUse(Node * node,int index,AllocationState const * state)483 void MemoryOptimizer::EnqueueUse(Node* node, int index,
484                                  AllocationState const* state) {
485   if (node->opcode() == IrOpcode::kEffectPhi) {
486     // An EffectPhi represents a merge of different effect chains, which
487     // needs special handling depending on whether the merge is part of a
488     // loop or just a normal control join.
489     EnqueueMerge(node, index, state);
490   } else {
491     Token token = {node, state};
492     tokens_.push(token);
493   }
494 }
495 
graph() const496 Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }
497 
isolate() const498 Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }
499 
common() const500 CommonOperatorBuilder* MemoryOptimizer::common() const {
501   return jsgraph()->common();
502 }
503 
machine() const504 MachineOperatorBuilder* MemoryOptimizer::machine() const {
505   return jsgraph()->machine();
506 }
507 
508 }  // namespace compiler
509 }  // namespace internal
510 }  // namespace v8
511