1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/instruction-scheduler.h"
6
7 #include "src/base/adapters.h"
8 #include "src/base/utils/random-number-generator.h"
9 #include "src/isolate.h"
10
11 namespace v8 {
12 namespace internal {
13 namespace compiler {
14
AddNode(ScheduleGraphNode * node)15 void InstructionScheduler::SchedulingQueueBase::AddNode(
16 ScheduleGraphNode* node) {
17 // We keep the ready list sorted by total latency so that we can quickly find
18 // the next best candidate to schedule.
19 auto it = nodes_.begin();
20 while ((it != nodes_.end()) &&
21 ((*it)->total_latency() >= node->total_latency())) {
22 ++it;
23 }
24 nodes_.insert(it, node);
25 }
26
27
28 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)29 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
30 DCHECK(!IsEmpty());
31 auto candidate = nodes_.end();
32 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
33 // We only consider instructions that have all their operands ready.
34 if (cycle >= (*iterator)->start_cycle()) {
35 candidate = iterator;
36 break;
37 }
38 }
39
40 if (candidate != nodes_.end()) {
41 ScheduleGraphNode *result = *candidate;
42 nodes_.erase(candidate);
43 return result;
44 }
45
46 return nullptr;
47 }
48
49
50 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)51 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
52 DCHECK(!IsEmpty());
53 // Choose a random element from the ready list.
54 auto candidate = nodes_.begin();
55 std::advance(candidate, isolate()->random_number_generator()->NextInt(
56 static_cast<int>(nodes_.size())));
57 ScheduleGraphNode *result = *candidate;
58 nodes_.erase(candidate);
59 return result;
60 }
61
62
ScheduleGraphNode(Zone * zone,Instruction * instr)63 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
64 Zone* zone,
65 Instruction* instr)
66 : instr_(instr),
67 successors_(zone),
68 unscheduled_predecessors_count_(0),
69 latency_(GetInstructionLatency(instr)),
70 total_latency_(-1),
71 start_cycle_(-1) {
72 }
73
74
AddSuccessor(ScheduleGraphNode * node)75 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
76 ScheduleGraphNode* node) {
77 successors_.push_back(node);
78 node->unscheduled_predecessors_count_++;
79 }
80
InstructionScheduler(Zone * zone,InstructionSequence * sequence)81 InstructionScheduler::InstructionScheduler(Zone* zone,
82 InstructionSequence* sequence)
83 : zone_(zone),
84 sequence_(sequence),
85 graph_(zone),
86 last_side_effect_instr_(nullptr),
87 pending_loads_(zone),
88 last_live_in_reg_marker_(nullptr),
89 last_deopt_or_trap_(nullptr),
90 operands_map_(zone) {}
91
StartBlock(RpoNumber rpo)92 void InstructionScheduler::StartBlock(RpoNumber rpo) {
93 DCHECK(graph_.empty());
94 DCHECK_NULL(last_side_effect_instr_);
95 DCHECK(pending_loads_.empty());
96 DCHECK_NULL(last_live_in_reg_marker_);
97 DCHECK_NULL(last_deopt_or_trap_);
98 DCHECK(operands_map_.empty());
99 sequence()->StartBlock(rpo);
100 }
101
102
EndBlock(RpoNumber rpo)103 void InstructionScheduler::EndBlock(RpoNumber rpo) {
104 if (FLAG_turbo_stress_instruction_scheduling) {
105 ScheduleBlock<StressSchedulerQueue>();
106 } else {
107 ScheduleBlock<CriticalPathFirstQueue>();
108 }
109 sequence()->EndBlock(rpo);
110 graph_.clear();
111 last_side_effect_instr_ = nullptr;
112 pending_loads_.clear();
113 last_live_in_reg_marker_ = nullptr;
114 last_deopt_or_trap_ = nullptr;
115 operands_map_.clear();
116 }
117
AddTerminator(Instruction * instr)118 void InstructionScheduler::AddTerminator(Instruction* instr) {
119 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
120 // Make sure that basic block terminators are not moved by adding them
121 // as successor of every instruction.
122 for (ScheduleGraphNode* node : graph_) {
123 node->AddSuccessor(new_node);
124 }
125 graph_.push_back(new_node);
126 }
127
AddInstruction(Instruction * instr)128 void InstructionScheduler::AddInstruction(Instruction* instr) {
129 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
130
131 // We should not have branches in the middle of a block.
132 DCHECK_NE(instr->flags_mode(), kFlags_branch);
133 DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
134
135 if (IsFixedRegisterParameter(instr)) {
136 if (last_live_in_reg_marker_ != nullptr) {
137 last_live_in_reg_marker_->AddSuccessor(new_node);
138 }
139 last_live_in_reg_marker_ = new_node;
140 } else {
141 if (last_live_in_reg_marker_ != nullptr) {
142 last_live_in_reg_marker_->AddSuccessor(new_node);
143 }
144
145 // Make sure that instructions are not scheduled before the last
146 // deoptimization or trap point when they depend on it.
147 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
148 last_deopt_or_trap_->AddSuccessor(new_node);
149 }
150
151 // Instructions with side effects and memory operations can't be
152 // reordered with respect to each other.
153 if (HasSideEffect(instr)) {
154 if (last_side_effect_instr_ != nullptr) {
155 last_side_effect_instr_->AddSuccessor(new_node);
156 }
157 for (ScheduleGraphNode* load : pending_loads_) {
158 load->AddSuccessor(new_node);
159 }
160 pending_loads_.clear();
161 last_side_effect_instr_ = new_node;
162 } else if (IsLoadOperation(instr)) {
163 // Load operations can't be reordered with side effects instructions but
164 // independent loads can be reordered with respect to each other.
165 if (last_side_effect_instr_ != nullptr) {
166 last_side_effect_instr_->AddSuccessor(new_node);
167 }
168 pending_loads_.push_back(new_node);
169 } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
170 // Ensure that deopts or traps are not reordered with respect to
171 // side-effect instructions.
172 if (last_side_effect_instr_ != nullptr) {
173 last_side_effect_instr_->AddSuccessor(new_node);
174 }
175 last_deopt_or_trap_ = new_node;
176 }
177
178 // Look for operand dependencies.
179 for (size_t i = 0; i < instr->InputCount(); ++i) {
180 const InstructionOperand* input = instr->InputAt(i);
181 if (input->IsUnallocated()) {
182 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
183 auto it = operands_map_.find(vreg);
184 if (it != operands_map_.end()) {
185 it->second->AddSuccessor(new_node);
186 }
187 }
188 }
189
190 // Record the virtual registers defined by this instruction.
191 for (size_t i = 0; i < instr->OutputCount(); ++i) {
192 const InstructionOperand* output = instr->OutputAt(i);
193 if (output->IsUnallocated()) {
194 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
195 new_node;
196 } else if (output->IsConstant()) {
197 operands_map_[ConstantOperand::cast(output)->virtual_register()] =
198 new_node;
199 }
200 }
201 }
202
203 graph_.push_back(new_node);
204 }
205
206
207 template <typename QueueType>
ScheduleBlock()208 void InstructionScheduler::ScheduleBlock() {
209 QueueType ready_list(this);
210
211 // Compute total latencies so that we can schedule the critical path first.
212 ComputeTotalLatencies();
213
214 // Add nodes which don't have dependencies to the ready list.
215 for (ScheduleGraphNode* node : graph_) {
216 if (!node->HasUnscheduledPredecessor()) {
217 ready_list.AddNode(node);
218 }
219 }
220
221 // Go through the ready list and schedule the instructions.
222 int cycle = 0;
223 while (!ready_list.IsEmpty()) {
224 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
225
226 if (candidate != nullptr) {
227 sequence()->AddInstruction(candidate->instruction());
228
229 for (ScheduleGraphNode* successor : candidate->successors()) {
230 successor->DropUnscheduledPredecessor();
231 successor->set_start_cycle(
232 std::max(successor->start_cycle(),
233 cycle + candidate->latency()));
234
235 if (!successor->HasUnscheduledPredecessor()) {
236 ready_list.AddNode(successor);
237 }
238 }
239 }
240
241 cycle++;
242 }
243 }
244
245
GetInstructionFlags(const Instruction * instr) const246 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
247 switch (instr->arch_opcode()) {
248 case kArchNop:
249 case kArchFramePointer:
250 case kArchParentFramePointer:
251 case kArchStackSlot: // Despite its name this opcode will produce a
252 // reference to a frame slot, so it is not affected
253 // by the arm64 dual stack issues mentioned below.
254 case kArchComment:
255 case kArchDeoptimize:
256 case kArchJmp:
257 case kArchBinarySearchSwitch:
258 case kArchLookupSwitch:
259 case kArchRet:
260 case kArchTableSwitch:
261 case kArchThrowTerminator:
262 return kNoOpcodeFlags;
263
264 case kArchTruncateDoubleToI:
265 case kIeee754Float64Acos:
266 case kIeee754Float64Acosh:
267 case kIeee754Float64Asin:
268 case kIeee754Float64Asinh:
269 case kIeee754Float64Atan:
270 case kIeee754Float64Atanh:
271 case kIeee754Float64Atan2:
272 case kIeee754Float64Cbrt:
273 case kIeee754Float64Cos:
274 case kIeee754Float64Cosh:
275 case kIeee754Float64Exp:
276 case kIeee754Float64Expm1:
277 case kIeee754Float64Log:
278 case kIeee754Float64Log1p:
279 case kIeee754Float64Log10:
280 case kIeee754Float64Log2:
281 case kIeee754Float64Pow:
282 case kIeee754Float64Sin:
283 case kIeee754Float64Sinh:
284 case kIeee754Float64Tan:
285 case kIeee754Float64Tanh:
286 return kNoOpcodeFlags;
287
288 case kArchStackPointer:
289 // ArchStackPointer instruction loads the current stack pointer value and
290 // must not be reordered with instruction with side effects.
291 return kIsLoadOperation;
292
293 case kArchWordPoisonOnSpeculation:
294 // While poisoning operations have no side effect, they must not be
295 // reordered relative to branches.
296 return kHasSideEffect;
297
298 case kArchPrepareCallCFunction:
299 case kArchSaveCallerRegisters:
300 case kArchRestoreCallerRegisters:
301 case kArchPrepareTailCall:
302 case kArchCallCFunction:
303 case kArchCallCodeObject:
304 case kArchCallJSFunction:
305 case kArchCallWasmFunction:
306 case kArchTailCallCodeObjectFromJSFunction:
307 case kArchTailCallCodeObject:
308 case kArchTailCallAddress:
309 case kArchTailCallWasm:
310 case kArchDebugAbort:
311 case kArchDebugBreak:
312 return kHasSideEffect;
313
314 case kArchStoreWithWriteBarrier:
315 return kHasSideEffect;
316
317 case kWord32AtomicLoadInt8:
318 case kWord32AtomicLoadUint8:
319 case kWord32AtomicLoadInt16:
320 case kWord32AtomicLoadUint16:
321 case kWord32AtomicLoadWord32:
322 return kIsLoadOperation;
323
324 case kWord32AtomicStoreWord8:
325 case kWord32AtomicStoreWord16:
326 case kWord32AtomicStoreWord32:
327 return kHasSideEffect;
328
329 case kWord32AtomicExchangeInt8:
330 case kWord32AtomicExchangeUint8:
331 case kWord32AtomicExchangeInt16:
332 case kWord32AtomicExchangeUint16:
333 case kWord32AtomicExchangeWord32:
334 case kWord32AtomicCompareExchangeInt8:
335 case kWord32AtomicCompareExchangeUint8:
336 case kWord32AtomicCompareExchangeInt16:
337 case kWord32AtomicCompareExchangeUint16:
338 case kWord32AtomicCompareExchangeWord32:
339 case kWord32AtomicAddInt8:
340 case kWord32AtomicAddUint8:
341 case kWord32AtomicAddInt16:
342 case kWord32AtomicAddUint16:
343 case kWord32AtomicAddWord32:
344 case kWord32AtomicSubInt8:
345 case kWord32AtomicSubUint8:
346 case kWord32AtomicSubInt16:
347 case kWord32AtomicSubUint16:
348 case kWord32AtomicSubWord32:
349 case kWord32AtomicAndInt8:
350 case kWord32AtomicAndUint8:
351 case kWord32AtomicAndInt16:
352 case kWord32AtomicAndUint16:
353 case kWord32AtomicAndWord32:
354 case kWord32AtomicOrInt8:
355 case kWord32AtomicOrUint8:
356 case kWord32AtomicOrInt16:
357 case kWord32AtomicOrUint16:
358 case kWord32AtomicOrWord32:
359 case kWord32AtomicXorInt8:
360 case kWord32AtomicXorUint8:
361 case kWord32AtomicXorInt16:
362 case kWord32AtomicXorUint16:
363 case kWord32AtomicXorWord32:
364 return kHasSideEffect;
365
366 #define CASE(Name) case k##Name:
367 TARGET_ARCH_OPCODE_LIST(CASE)
368 #undef CASE
369 return GetTargetInstructionFlags(instr);
370 }
371
372 UNREACHABLE();
373 }
374
ComputeTotalLatencies()375 void InstructionScheduler::ComputeTotalLatencies() {
376 for (ScheduleGraphNode* node : base::Reversed(graph_)) {
377 int max_latency = 0;
378
379 for (ScheduleGraphNode* successor : node->successors()) {
380 DCHECK_NE(-1, successor->total_latency());
381 if (successor->total_latency() > max_latency) {
382 max_latency = successor->total_latency();
383 }
384 }
385
386 node->set_total_latency(max_latency + node->latency());
387 }
388 }
389
390 } // namespace compiler
391 } // namespace internal
392 } // namespace v8
393