1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/backend/instruction-scheduler.h"
6
7 #include "src/base/iterator.h"
8 #include "src/base/optional.h"
9 #include "src/base/utils/random-number-generator.h"
10
11 namespace v8 {
12 namespace internal {
13 namespace compiler {
14
AddNode(ScheduleGraphNode * node)15 void InstructionScheduler::SchedulingQueueBase::AddNode(
16 ScheduleGraphNode* node) {
17 // We keep the ready list sorted by total latency so that we can quickly find
18 // the next best candidate to schedule.
19 auto it = nodes_.begin();
20 while ((it != nodes_.end()) &&
21 ((*it)->total_latency() >= node->total_latency())) {
22 ++it;
23 }
24 nodes_.insert(it, node);
25 }
26
27 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 DCHECK(!IsEmpty());
30 auto candidate = nodes_.end();
31 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 // We only consider instructions that have all their operands ready.
33 if (cycle >= (*iterator)->start_cycle()) {
34 candidate = iterator;
35 break;
36 }
37 }
38
39 if (candidate != nodes_.end()) {
40 ScheduleGraphNode* result = *candidate;
41 nodes_.erase(candidate);
42 return result;
43 }
44
45 return nullptr;
46 }
47
48 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)49 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50 DCHECK(!IsEmpty());
51 // Choose a random element from the ready list.
52 auto candidate = nodes_.begin();
53 std::advance(candidate, random_number_generator()->NextInt(
54 static_cast<int>(nodes_.size())));
55 ScheduleGraphNode* result = *candidate;
56 nodes_.erase(candidate);
57 return result;
58 }
59
ScheduleGraphNode(Zone * zone,Instruction * instr)60 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61 Instruction* instr)
62 : instr_(instr),
63 successors_(zone),
64 unscheduled_predecessors_count_(0),
65 latency_(GetInstructionLatency(instr)),
66 total_latency_(-1),
67 start_cycle_(-1) {}
68
AddSuccessor(ScheduleGraphNode * node)69 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70 ScheduleGraphNode* node) {
71 successors_.push_back(node);
72 node->unscheduled_predecessors_count_++;
73 }
74
InstructionScheduler(Zone * zone,InstructionSequence * sequence)75 InstructionScheduler::InstructionScheduler(Zone* zone,
76 InstructionSequence* sequence)
77 : zone_(zone),
78 sequence_(sequence),
79 graph_(zone),
80 last_side_effect_instr_(nullptr),
81 pending_loads_(zone),
82 last_live_in_reg_marker_(nullptr),
83 last_deopt_or_trap_(nullptr),
84 operands_map_(zone) {
85 if (FLAG_turbo_stress_instruction_scheduling) {
86 random_number_generator_ =
87 base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
88 }
89 }
90
StartBlock(RpoNumber rpo)91 void InstructionScheduler::StartBlock(RpoNumber rpo) {
92 DCHECK(graph_.empty());
93 DCHECK_NULL(last_side_effect_instr_);
94 DCHECK(pending_loads_.empty());
95 DCHECK_NULL(last_live_in_reg_marker_);
96 DCHECK_NULL(last_deopt_or_trap_);
97 DCHECK(operands_map_.empty());
98 sequence()->StartBlock(rpo);
99 }
100
EndBlock(RpoNumber rpo)101 void InstructionScheduler::EndBlock(RpoNumber rpo) {
102 if (FLAG_turbo_stress_instruction_scheduling) {
103 Schedule<StressSchedulerQueue>();
104 } else {
105 Schedule<CriticalPathFirstQueue>();
106 }
107 sequence()->EndBlock(rpo);
108 }
109
AddTerminator(Instruction * instr)110 void InstructionScheduler::AddTerminator(Instruction* instr) {
111 ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
112 // Make sure that basic block terminators are not moved by adding them
113 // as successor of every instruction.
114 for (ScheduleGraphNode* node : graph_) {
115 node->AddSuccessor(new_node);
116 }
117 graph_.push_back(new_node);
118 }
119
AddInstruction(Instruction * instr)120 void InstructionScheduler::AddInstruction(Instruction* instr) {
121 if (IsBarrier(instr)) {
122 if (FLAG_turbo_stress_instruction_scheduling) {
123 Schedule<StressSchedulerQueue>();
124 } else {
125 Schedule<CriticalPathFirstQueue>();
126 }
127 sequence()->AddInstruction(instr);
128 return;
129 }
130
131 ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
132
133 // We should not have branches in the middle of a block.
134 DCHECK_NE(instr->flags_mode(), kFlags_branch);
135
136 if (IsFixedRegisterParameter(instr)) {
137 if (last_live_in_reg_marker_ != nullptr) {
138 last_live_in_reg_marker_->AddSuccessor(new_node);
139 }
140 last_live_in_reg_marker_ = new_node;
141 } else {
142 if (last_live_in_reg_marker_ != nullptr) {
143 last_live_in_reg_marker_->AddSuccessor(new_node);
144 }
145
146 // Make sure that instructions are not scheduled before the last
147 // deoptimization or trap point when they depend on it.
148 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
149 last_deopt_or_trap_->AddSuccessor(new_node);
150 }
151
152 // Instructions with side effects and memory operations can't be
153 // reordered with respect to each other.
154 if (HasSideEffect(instr)) {
155 if (last_side_effect_instr_ != nullptr) {
156 last_side_effect_instr_->AddSuccessor(new_node);
157 }
158 for (ScheduleGraphNode* load : pending_loads_) {
159 load->AddSuccessor(new_node);
160 }
161 pending_loads_.clear();
162 last_side_effect_instr_ = new_node;
163 } else if (IsLoadOperation(instr)) {
164 // Load operations can't be reordered with side effects instructions but
165 // independent loads can be reordered with respect to each other.
166 if (last_side_effect_instr_ != nullptr) {
167 last_side_effect_instr_->AddSuccessor(new_node);
168 }
169 pending_loads_.push_back(new_node);
170 } else if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
171 // Ensure that deopts or traps are not reordered with respect to
172 // side-effect instructions.
173 if (last_side_effect_instr_ != nullptr) {
174 last_side_effect_instr_->AddSuccessor(new_node);
175 }
176 }
177
178 // Update last deoptimization or trap point.
179 if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
180 last_deopt_or_trap_ = new_node;
181 }
182
183 // Look for operand dependencies.
184 for (size_t i = 0; i < instr->InputCount(); ++i) {
185 const InstructionOperand* input = instr->InputAt(i);
186 if (input->IsUnallocated()) {
187 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
188 auto it = operands_map_.find(vreg);
189 if (it != operands_map_.end()) {
190 it->second->AddSuccessor(new_node);
191 }
192 }
193 }
194
195 // Record the virtual registers defined by this instruction.
196 for (size_t i = 0; i < instr->OutputCount(); ++i) {
197 const InstructionOperand* output = instr->OutputAt(i);
198 if (output->IsUnallocated()) {
199 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
200 new_node;
201 } else if (output->IsConstant()) {
202 operands_map_[ConstantOperand::cast(output)->virtual_register()] =
203 new_node;
204 }
205 }
206 }
207
208 graph_.push_back(new_node);
209 }
210
211 template <typename QueueType>
Schedule()212 void InstructionScheduler::Schedule() {
213 QueueType ready_list(this);
214
215 // Compute total latencies so that we can schedule the critical path first.
216 ComputeTotalLatencies();
217
218 // Add nodes which don't have dependencies to the ready list.
219 for (ScheduleGraphNode* node : graph_) {
220 if (!node->HasUnscheduledPredecessor()) {
221 ready_list.AddNode(node);
222 }
223 }
224
225 // Go through the ready list and schedule the instructions.
226 int cycle = 0;
227 while (!ready_list.IsEmpty()) {
228 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
229
230 if (candidate != nullptr) {
231 sequence()->AddInstruction(candidate->instruction());
232
233 for (ScheduleGraphNode* successor : candidate->successors()) {
234 successor->DropUnscheduledPredecessor();
235 successor->set_start_cycle(
236 std::max(successor->start_cycle(), cycle + candidate->latency()));
237
238 if (!successor->HasUnscheduledPredecessor()) {
239 ready_list.AddNode(successor);
240 }
241 }
242 }
243
244 cycle++;
245 }
246
247 // Reset own state.
248 graph_.clear();
249 operands_map_.clear();
250 pending_loads_.clear();
251 last_deopt_or_trap_ = nullptr;
252 last_live_in_reg_marker_ = nullptr;
253 last_side_effect_instr_ = nullptr;
254 }
255
GetInstructionFlags(const Instruction * instr) const256 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
257 switch (instr->arch_opcode()) {
258 case kArchNop:
259 case kArchStackCheckOffset:
260 case kArchFramePointer:
261 case kArchParentFramePointer:
262 case kArchStackSlot: // Despite its name this opcode will produce a
263 // reference to a frame slot, so it is not affected
264 // by the arm64 dual stack issues mentioned below.
265 case kArchComment:
266 case kArchDeoptimize:
267 case kArchJmp:
268 case kArchBinarySearchSwitch:
269 case kArchRet:
270 case kArchTableSwitch:
271 case kArchThrowTerminator:
272 return kNoOpcodeFlags;
273
274 case kArchTruncateDoubleToI:
275 case kIeee754Float64Acos:
276 case kIeee754Float64Acosh:
277 case kIeee754Float64Asin:
278 case kIeee754Float64Asinh:
279 case kIeee754Float64Atan:
280 case kIeee754Float64Atanh:
281 case kIeee754Float64Atan2:
282 case kIeee754Float64Cbrt:
283 case kIeee754Float64Cos:
284 case kIeee754Float64Cosh:
285 case kIeee754Float64Exp:
286 case kIeee754Float64Expm1:
287 case kIeee754Float64Log:
288 case kIeee754Float64Log1p:
289 case kIeee754Float64Log10:
290 case kIeee754Float64Log2:
291 case kIeee754Float64Pow:
292 case kIeee754Float64Sin:
293 case kIeee754Float64Sinh:
294 case kIeee754Float64Tan:
295 case kIeee754Float64Tanh:
296 return kNoOpcodeFlags;
297
298 case kArchStackPointerGreaterThan:
299 // The ArchStackPointerGreaterThan instruction loads the current stack
300 // pointer value and must not be reordered with instructions with side
301 // effects.
302 return kIsLoadOperation;
303
304 case kArchPrepareCallCFunction:
305 case kArchPrepareTailCall:
306 case kArchTailCallCodeObject:
307 case kArchTailCallAddress:
308 #if V8_ENABLE_WEBASSEMBLY
309 case kArchTailCallWasm:
310 #endif // V8_ENABLE_WEBASSEMBLY
311 case kArchAbortCSADcheck:
312 return kHasSideEffect;
313
314 case kArchDebugBreak:
315 return kIsBarrier;
316
317 case kArchSaveCallerRegisters:
318 case kArchRestoreCallerRegisters:
319 return kIsBarrier;
320
321 case kArchCallCFunction:
322 case kArchCallCodeObject:
323 case kArchCallJSFunction:
324 #if V8_ENABLE_WEBASSEMBLY
325 case kArchCallWasmFunction:
326 #endif // V8_ENABLE_WEBASSEMBLY
327 case kArchCallBuiltinPointer:
328 // Calls can cause GC and GC may relocate objects. If a pure instruction
329 // operates on a tagged pointer that was cast to a word then it may be
330 // incorrect to move the instruction across the call. Hence we mark all
331 // (non-tail-)calls as barriers.
332 return kIsBarrier;
333
334 case kArchStoreWithWriteBarrier:
335 case kArchAtomicStoreWithWriteBarrier:
336 return kHasSideEffect;
337
338 case kAtomicLoadInt8:
339 case kAtomicLoadUint8:
340 case kAtomicLoadInt16:
341 case kAtomicLoadUint16:
342 case kAtomicLoadWord32:
343 return kIsLoadOperation;
344
345 case kAtomicStoreWord8:
346 case kAtomicStoreWord16:
347 case kAtomicStoreWord32:
348 return kHasSideEffect;
349
350 case kAtomicExchangeInt8:
351 case kAtomicExchangeUint8:
352 case kAtomicExchangeInt16:
353 case kAtomicExchangeUint16:
354 case kAtomicExchangeWord32:
355 case kAtomicCompareExchangeInt8:
356 case kAtomicCompareExchangeUint8:
357 case kAtomicCompareExchangeInt16:
358 case kAtomicCompareExchangeUint16:
359 case kAtomicCompareExchangeWord32:
360 case kAtomicAddInt8:
361 case kAtomicAddUint8:
362 case kAtomicAddInt16:
363 case kAtomicAddUint16:
364 case kAtomicAddWord32:
365 case kAtomicSubInt8:
366 case kAtomicSubUint8:
367 case kAtomicSubInt16:
368 case kAtomicSubUint16:
369 case kAtomicSubWord32:
370 case kAtomicAndInt8:
371 case kAtomicAndUint8:
372 case kAtomicAndInt16:
373 case kAtomicAndUint16:
374 case kAtomicAndWord32:
375 case kAtomicOrInt8:
376 case kAtomicOrUint8:
377 case kAtomicOrInt16:
378 case kAtomicOrUint16:
379 case kAtomicOrWord32:
380 case kAtomicXorInt8:
381 case kAtomicXorUint8:
382 case kAtomicXorInt16:
383 case kAtomicXorUint16:
384 case kAtomicXorWord32:
385 return kHasSideEffect;
386
387 #define CASE(Name) case k##Name:
388 TARGET_ARCH_OPCODE_LIST(CASE)
389 #undef CASE
390 return GetTargetInstructionFlags(instr);
391 }
392
393 UNREACHABLE();
394 }
395
ComputeTotalLatencies()396 void InstructionScheduler::ComputeTotalLatencies() {
397 for (ScheduleGraphNode* node : base::Reversed(graph_)) {
398 int max_latency = 0;
399
400 for (ScheduleGraphNode* successor : node->successors()) {
401 DCHECK_NE(-1, successor->total_latency());
402 if (successor->total_latency() > max_latency) {
403 max_latency = successor->total_latency();
404 }
405 }
406
407 node->set_total_latency(max_latency + node->latency());
408 }
409 }
410
411 } // namespace compiler
412 } // namespace internal
413 } // namespace v8
414