1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/memory-optimizer.h"
6
7 #include "src/compiler/js-graph.h"
8 #include "src/compiler/linkage.h"
9 #include "src/compiler/node-matchers.h"
10 #include "src/compiler/node-properties.h"
11 #include "src/compiler/node.h"
12 #include "src/compiler/simplified-operator.h"
13
14 namespace v8 {
15 namespace internal {
16 namespace compiler {
17
MemoryOptimizer(JSGraph * jsgraph,Zone * zone)18 MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)
19 : jsgraph_(jsgraph),
20 empty_state_(AllocationState::Empty(zone)),
21 pending_(zone),
22 tokens_(zone),
23 zone_(zone),
24 graph_assembler_(jsgraph, nullptr, nullptr, zone) {}
25
Optimize()26 void MemoryOptimizer::Optimize() {
27 EnqueueUses(graph()->start(), empty_state());
28 while (!tokens_.empty()) {
29 Token const token = tokens_.front();
30 tokens_.pop();
31 VisitNode(token.node, token.state);
32 }
33 DCHECK(pending_.empty());
34 DCHECK(tokens_.empty());
35 }
36
AllocationGroup(Node * node,PretenureFlag pretenure,Zone * zone)37 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
38 PretenureFlag pretenure,
39 Zone* zone)
40 : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {
41 node_ids_.insert(node->id());
42 }
43
AllocationGroup(Node * node,PretenureFlag pretenure,Node * size,Zone * zone)44 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
45 PretenureFlag pretenure,
46 Node* size, Zone* zone)
47 : node_ids_(zone), pretenure_(pretenure), size_(size) {
48 node_ids_.insert(node->id());
49 }
50
Add(Node * node)51 void MemoryOptimizer::AllocationGroup::Add(Node* node) {
52 node_ids_.insert(node->id());
53 }
54
Contains(Node * node) const55 bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {
56 return node_ids_.find(node->id()) != node_ids_.end();
57 }
58
AllocationState()59 MemoryOptimizer::AllocationState::AllocationState()
60 : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
61
AllocationState(AllocationGroup * group)62 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)
63 : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
64
AllocationState(AllocationGroup * group,int size,Node * top)65 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,
66 int size, Node* top)
67 : group_(group), size_(size), top_(top) {}
68
IsNewSpaceAllocation() const69 bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {
70 return group() && group()->IsNewSpaceAllocation();
71 }
72
VisitNode(Node * node,AllocationState const * state)73 void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {
74 DCHECK(!node->IsDead());
75 DCHECK_LT(0, node->op()->EffectInputCount());
76 switch (node->opcode()) {
77 case IrOpcode::kAllocate:
78 return VisitAllocate(node, state);
79 case IrOpcode::kCall:
80 return VisitCall(node, state);
81 case IrOpcode::kLoadElement:
82 return VisitLoadElement(node, state);
83 case IrOpcode::kLoadField:
84 return VisitLoadField(node, state);
85 case IrOpcode::kStoreElement:
86 return VisitStoreElement(node, state);
87 case IrOpcode::kStoreField:
88 return VisitStoreField(node, state);
89 case IrOpcode::kCheckedLoad:
90 case IrOpcode::kCheckedStore:
91 case IrOpcode::kDeoptimizeIf:
92 case IrOpcode::kDeoptimizeUnless:
93 case IrOpcode::kIfException:
94 case IrOpcode::kLoad:
95 case IrOpcode::kProtectedLoad:
96 case IrOpcode::kStore:
97 case IrOpcode::kProtectedStore:
98 case IrOpcode::kRetain:
99 case IrOpcode::kUnsafePointerAdd:
100 return VisitOtherEffect(node, state);
101 default:
102 break;
103 }
104 DCHECK_EQ(0, node->op()->EffectOutputCount());
105 }
106
107 #define __ gasm()->
108
VisitAllocate(Node * node,AllocationState const * state)109 void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {
110 DCHECK_EQ(IrOpcode::kAllocate, node->opcode());
111 Node* value;
112 Node* size = node->InputAt(0);
113 Node* effect = node->InputAt(1);
114 Node* control = node->InputAt(2);
115
116 gasm()->Reset(effect, control);
117
118 PretenureFlag pretenure = PretenureFlagOf(node->op());
119
120 // Propagate tenuring from outer allocations to inner allocations, i.e.
121 // when we allocate an object in old space and store a newly allocated
122 // child object into the pretenured object, then the newly allocated
123 // child object also should get pretenured to old space.
124 if (pretenure == TENURED) {
125 for (Edge const edge : node->use_edges()) {
126 Node* const user = edge.from();
127 if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) {
128 Node* const child = user->InputAt(1);
129 if (child->opcode() == IrOpcode::kAllocate &&
130 PretenureFlagOf(child->op()) == NOT_TENURED) {
131 NodeProperties::ChangeOp(child, node->op());
132 break;
133 }
134 }
135 }
136 } else {
137 DCHECK_EQ(NOT_TENURED, pretenure);
138 for (Edge const edge : node->use_edges()) {
139 Node* const user = edge.from();
140 if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) {
141 Node* const parent = user->InputAt(0);
142 if (parent->opcode() == IrOpcode::kAllocate &&
143 PretenureFlagOf(parent->op()) == TENURED) {
144 pretenure = TENURED;
145 break;
146 }
147 }
148 }
149 }
150
151 // Determine the top/limit addresses.
152 Node* top_address = __ ExternalConstant(
153 pretenure == NOT_TENURED
154 ? ExternalReference::new_space_allocation_top_address(isolate())
155 : ExternalReference::old_space_allocation_top_address(isolate()));
156 Node* limit_address = __ ExternalConstant(
157 pretenure == NOT_TENURED
158 ? ExternalReference::new_space_allocation_limit_address(isolate())
159 : ExternalReference::old_space_allocation_limit_address(isolate()));
160
161 // Check if we can fold this allocation into a previous allocation represented
162 // by the incoming {state}.
163 Int32Matcher m(size);
164 if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) {
165 int32_t const object_size = m.Value();
166 if (state->size() <= kMaxRegularHeapObjectSize - object_size &&
167 state->group()->pretenure() == pretenure) {
168 // We can fold this Allocate {node} into the allocation {group}
169 // represented by the given {state}. Compute the upper bound for
170 // the new {state}.
171 int32_t const state_size = state->size() + object_size;
172
173 // Update the reservation check to the actual maximum upper bound.
174 AllocationGroup* const group = state->group();
175 if (OpParameter<int32_t>(group->size()) < state_size) {
176 NodeProperties::ChangeOp(group->size(),
177 common()->Int32Constant(state_size));
178 }
179
180 // Update the allocation top with the new object allocation.
181 // TODO(bmeurer): Defer writing back top as much as possible.
182 Node* top = __ IntAdd(state->top(), __ IntPtrConstant(object_size));
183 __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
184 kNoWriteBarrier),
185 top_address, __ IntPtrConstant(0), top);
186
187 // Compute the effective inner allocated address.
188 value = __ BitcastWordToTagged(
189 __ IntAdd(state->top(), __ IntPtrConstant(kHeapObjectTag)));
190
191 // Extend the allocation {group}.
192 group->Add(value);
193 state = AllocationState::Open(group, state_size, top, zone());
194 } else {
195 auto call_runtime = __ MakeDeferredLabel<1>();
196 auto done = __ MakeLabel<2>(MachineType::PointerRepresentation());
197
198 // Setup a mutable reservation size node; will be patched as we fold
199 // additional allocations into this new group.
200 Node* size = __ UniqueInt32Constant(object_size);
201
202 // Load allocation top and limit.
203 Node* top =
204 __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
205 Node* limit =
206 __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));
207
208 // Check if we need to collect garbage before we can start bump pointer
209 // allocation (always done for folded allocations).
210 Node* check = __ UintLessThan(
211 __ IntAdd(top,
212 machine()->Is64() ? __ ChangeInt32ToInt64(size) : size),
213 limit);
214
215 __ GotoUnless(check, &call_runtime);
216 __ Goto(&done, top);
217
218 __ Bind(&call_runtime);
219 {
220 Node* target =
221 pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
222 : __
223 AllocateInOldSpaceStubConstant();
224 if (!allocate_operator_.is_set()) {
225 CallDescriptor* descriptor =
226 Linkage::GetAllocateCallDescriptor(graph()->zone());
227 allocate_operator_.set(common()->Call(descriptor));
228 }
229 Node* vfalse = __ Call(allocate_operator_.get(), target, size);
230 vfalse = __ IntSub(vfalse, __ IntPtrConstant(kHeapObjectTag));
231 __ Goto(&done, vfalse);
232 }
233
234 __ Bind(&done);
235
236 // Compute the new top and write it back.
237 top = __ IntAdd(done.PhiAt(0), __ IntPtrConstant(object_size));
238 __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
239 kNoWriteBarrier),
240 top_address, __ IntPtrConstant(0), top);
241
242 // Compute the initial object address.
243 value = __ BitcastWordToTagged(
244 __ IntAdd(done.PhiAt(0), __ IntPtrConstant(kHeapObjectTag)));
245
246 // Start a new allocation group.
247 AllocationGroup* group =
248 new (zone()) AllocationGroup(value, pretenure, size, zone());
249 state = AllocationState::Open(group, object_size, top, zone());
250 }
251 } else {
252 auto call_runtime = __ MakeDeferredLabel<1>();
253 auto done = __ MakeLabel<2>(MachineRepresentation::kTaggedPointer);
254
255 // Load allocation top and limit.
256 Node* top =
257 __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
258 Node* limit =
259 __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));
260
261 // Compute the new top.
262 Node* new_top =
263 __ IntAdd(top, machine()->Is64() ? __ ChangeInt32ToInt64(size) : size);
264
265 // Check if we can do bump pointer allocation here.
266 Node* check = __ UintLessThan(new_top, limit);
267 __ GotoUnless(check, &call_runtime);
268 __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
269 kNoWriteBarrier),
270 top_address, __ IntPtrConstant(0), new_top);
271 __ Goto(&done, __ BitcastWordToTagged(
272 __ IntAdd(top, __ IntPtrConstant(kHeapObjectTag))));
273
274 __ Bind(&call_runtime);
275 Node* target =
276 pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
277 : __
278 AllocateInOldSpaceStubConstant();
279 if (!allocate_operator_.is_set()) {
280 CallDescriptor* descriptor =
281 Linkage::GetAllocateCallDescriptor(graph()->zone());
282 allocate_operator_.set(common()->Call(descriptor));
283 }
284 __ Goto(&done, __ Call(allocate_operator_.get(), target, size));
285
286 __ Bind(&done);
287 value = done.PhiAt(0);
288
289 // Create an unfoldable allocation group.
290 AllocationGroup* group =
291 new (zone()) AllocationGroup(value, pretenure, zone());
292 state = AllocationState::Closed(group, zone());
293 }
294
295 effect = __ ExtractCurrentEffect();
296 control = __ ExtractCurrentControl();
297 USE(control); // Floating control, dropped on the floor.
298
299 // Replace all effect uses of {node} with the {effect}, enqueue the
300 // effect uses for further processing, and replace all value uses of
301 // {node} with the {value}.
302 for (Edge edge : node->use_edges()) {
303 if (NodeProperties::IsEffectEdge(edge)) {
304 EnqueueUse(edge.from(), edge.index(), state);
305 edge.UpdateTo(effect);
306 } else {
307 DCHECK(NodeProperties::IsValueEdge(edge));
308 edge.UpdateTo(value);
309 }
310 }
311
312 // Kill the {node} to make sure we don't leave dangling dead uses.
313 node->Kill();
314 }
315
316 #undef __
317
VisitCall(Node * node,AllocationState const * state)318 void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {
319 DCHECK_EQ(IrOpcode::kCall, node->opcode());
320 // If the call can allocate, we start with a fresh state.
321 if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
322 state = empty_state();
323 }
324 EnqueueUses(node, state);
325 }
326
VisitLoadElement(Node * node,AllocationState const * state)327 void MemoryOptimizer::VisitLoadElement(Node* node,
328 AllocationState const* state) {
329 DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());
330 ElementAccess const& access = ElementAccessOf(node->op());
331 Node* index = node->InputAt(1);
332 node->ReplaceInput(1, ComputeIndex(access, index));
333 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
334 EnqueueUses(node, state);
335 }
336
VisitLoadField(Node * node,AllocationState const * state)337 void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {
338 DCHECK_EQ(IrOpcode::kLoadField, node->opcode());
339 FieldAccess const& access = FieldAccessOf(node->op());
340 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
341 node->InsertInput(graph()->zone(), 1, offset);
342 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
343 EnqueueUses(node, state);
344 }
345
VisitStoreElement(Node * node,AllocationState const * state)346 void MemoryOptimizer::VisitStoreElement(Node* node,
347 AllocationState const* state) {
348 DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());
349 ElementAccess const& access = ElementAccessOf(node->op());
350 Node* object = node->InputAt(0);
351 Node* index = node->InputAt(1);
352 WriteBarrierKind write_barrier_kind =
353 ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
354 node->ReplaceInput(1, ComputeIndex(access, index));
355 NodeProperties::ChangeOp(
356 node, machine()->Store(StoreRepresentation(
357 access.machine_type.representation(), write_barrier_kind)));
358 EnqueueUses(node, state);
359 }
360
VisitStoreField(Node * node,AllocationState const * state)361 void MemoryOptimizer::VisitStoreField(Node* node,
362 AllocationState const* state) {
363 DCHECK_EQ(IrOpcode::kStoreField, node->opcode());
364 FieldAccess const& access = FieldAccessOf(node->op());
365 Node* object = node->InputAt(0);
366 WriteBarrierKind write_barrier_kind =
367 ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
368 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
369 node->InsertInput(graph()->zone(), 1, offset);
370 NodeProperties::ChangeOp(
371 node, machine()->Store(StoreRepresentation(
372 access.machine_type.representation(), write_barrier_kind)));
373 EnqueueUses(node, state);
374 }
375
VisitOtherEffect(Node * node,AllocationState const * state)376 void MemoryOptimizer::VisitOtherEffect(Node* node,
377 AllocationState const* state) {
378 EnqueueUses(node, state);
379 }
380
ComputeIndex(ElementAccess const & access,Node * key)381 Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {
382 Node* index;
383 if (machine()->Is64()) {
384 // On 64-bit platforms, we need to feed a Word64 index to the Load and
385 // Store operators. Since LoadElement or StoreElement don't do any bounds
386 // checking themselves, we can be sure that the {key} was already checked
387 // and is in valid range, so we can do the further address computation on
388 // Word64 below, which ideally allows us to fuse the address computation
389 // with the actual memory access operation on Intel platforms.
390 index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key);
391 } else {
392 index = key;
393 }
394 int const element_size_shift =
395 ElementSizeLog2Of(access.machine_type.representation());
396 if (element_size_shift) {
397 index = graph()->NewNode(machine()->WordShl(), index,
398 jsgraph()->IntPtrConstant(element_size_shift));
399 }
400 int const fixed_offset = access.header_size - access.tag();
401 if (fixed_offset) {
402 index = graph()->NewNode(machine()->IntAdd(), index,
403 jsgraph()->IntPtrConstant(fixed_offset));
404 }
405 return index;
406 }
407
ComputeWriteBarrierKind(Node * object,AllocationState const * state,WriteBarrierKind write_barrier_kind)408 WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(
409 Node* object, AllocationState const* state,
410 WriteBarrierKind write_barrier_kind) {
411 if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {
412 write_barrier_kind = kNoWriteBarrier;
413 }
414 return write_barrier_kind;
415 }
416
MergeStates(AllocationStates const & states)417 MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(
418 AllocationStates const& states) {
419 // Check if all states are the same; or at least if all allocation
420 // states belong to the same allocation group.
421 AllocationState const* state = states.front();
422 AllocationGroup* group = state->group();
423 for (size_t i = 1; i < states.size(); ++i) {
424 if (states[i] != state) state = nullptr;
425 if (states[i]->group() != group) group = nullptr;
426 }
427 if (state == nullptr) {
428 if (group != nullptr) {
429 // We cannot fold any more allocations into this group, but we can still
430 // eliminate write barriers on stores to this group.
431 // TODO(bmeurer): We could potentially just create a Phi here to merge
432 // the various tops; but we need to pay special attention not to create
433 // an unschedulable graph.
434 state = AllocationState::Closed(group, zone());
435 } else {
436 // The states are from different allocation groups.
437 state = empty_state();
438 }
439 }
440 return state;
441 }
442
EnqueueMerge(Node * node,int index,AllocationState const * state)443 void MemoryOptimizer::EnqueueMerge(Node* node, int index,
444 AllocationState const* state) {
445 DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());
446 int const input_count = node->InputCount() - 1;
447 DCHECK_LT(0, input_count);
448 Node* const control = node->InputAt(input_count);
449 if (control->opcode() == IrOpcode::kLoop) {
450 // For loops we always start with an empty state at the beginning.
451 if (index == 0) EnqueueUses(node, empty_state());
452 } else {
453 DCHECK_EQ(IrOpcode::kMerge, control->opcode());
454 // Check if we already know about this pending merge.
455 NodeId const id = node->id();
456 auto it = pending_.find(id);
457 if (it == pending_.end()) {
458 // Insert a new pending merge.
459 it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;
460 }
461 // Add the next input state.
462 it->second.push_back(state);
463 // Check if states for all inputs are available by now.
464 if (it->second.size() == static_cast<size_t>(input_count)) {
465 // All inputs to this effect merge are done, merge the states given all
466 // input constraints, drop the pending merge and enqueue uses of the
467 // EffectPhi {node}.
468 state = MergeStates(it->second);
469 EnqueueUses(node, state);
470 pending_.erase(it);
471 }
472 }
473 }
474
EnqueueUses(Node * node,AllocationState const * state)475 void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {
476 for (Edge const edge : node->use_edges()) {
477 if (NodeProperties::IsEffectEdge(edge)) {
478 EnqueueUse(edge.from(), edge.index(), state);
479 }
480 }
481 }
482
EnqueueUse(Node * node,int index,AllocationState const * state)483 void MemoryOptimizer::EnqueueUse(Node* node, int index,
484 AllocationState const* state) {
485 if (node->opcode() == IrOpcode::kEffectPhi) {
486 // An EffectPhi represents a merge of different effect chains, which
487 // needs special handling depending on whether the merge is part of a
488 // loop or just a normal control join.
489 EnqueueMerge(node, index, state);
490 } else {
491 Token token = {node, state};
492 tokens_.push(token);
493 }
494 }
495
graph() const496 Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }
497
isolate() const498 Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }
499
common() const500 CommonOperatorBuilder* MemoryOptimizer::common() const {
501 return jsgraph()->common();
502 }
503
machine() const504 MachineOperatorBuilder* MemoryOptimizer::machine() const {
505 return jsgraph()->machine();
506 }
507
508 } // namespace compiler
509 } // namespace internal
510 } // namespace v8
511