1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/instruction-scheduler.h"
6
7 #include "src/base/adapters.h"
8 #include "src/base/utils/random-number-generator.h"
9
10 namespace v8 {
11 namespace internal {
12 namespace compiler {
13
AddNode(ScheduleGraphNode * node)14 void InstructionScheduler::SchedulingQueueBase::AddNode(
15 ScheduleGraphNode* node) {
16 // We keep the ready list sorted by total latency so that we can quickly find
17 // the next best candidate to schedule.
18 auto it = nodes_.begin();
19 while ((it != nodes_.end()) &&
20 ((*it)->total_latency() >= node->total_latency())) {
21 ++it;
22 }
23 nodes_.insert(it, node);
24 }
25
26
27 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 DCHECK(!IsEmpty());
30 auto candidate = nodes_.end();
31 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 // We only consider instructions that have all their operands ready.
33 if (cycle >= (*iterator)->start_cycle()) {
34 candidate = iterator;
35 break;
36 }
37 }
38
39 if (candidate != nodes_.end()) {
40 ScheduleGraphNode *result = *candidate;
41 nodes_.erase(candidate);
42 return result;
43 }
44
45 return nullptr;
46 }
47
48
49 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)50 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
51 DCHECK(!IsEmpty());
52 // Choose a random element from the ready list.
53 auto candidate = nodes_.begin();
54 std::advance(candidate, isolate()->random_number_generator()->NextInt(
55 static_cast<int>(nodes_.size())));
56 ScheduleGraphNode *result = *candidate;
57 nodes_.erase(candidate);
58 return result;
59 }
60
61
ScheduleGraphNode(Zone * zone,Instruction * instr)62 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
63 Zone* zone,
64 Instruction* instr)
65 : instr_(instr),
66 successors_(zone),
67 unscheduled_predecessors_count_(0),
68 latency_(GetInstructionLatency(instr)),
69 total_latency_(-1),
70 start_cycle_(-1) {
71 }
72
73
AddSuccessor(ScheduleGraphNode * node)74 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
75 ScheduleGraphNode* node) {
76 successors_.push_back(node);
77 node->unscheduled_predecessors_count_++;
78 }
79
80
InstructionScheduler(Zone * zone,InstructionSequence * sequence)81 InstructionScheduler::InstructionScheduler(Zone* zone,
82 InstructionSequence* sequence)
83 : zone_(zone),
84 sequence_(sequence),
85 graph_(zone),
86 last_side_effect_instr_(nullptr),
87 pending_loads_(zone),
88 last_live_in_reg_marker_(nullptr),
89 last_deopt_(nullptr),
90 operands_map_(zone) {}
91
92
StartBlock(RpoNumber rpo)93 void InstructionScheduler::StartBlock(RpoNumber rpo) {
94 DCHECK(graph_.empty());
95 DCHECK(last_side_effect_instr_ == nullptr);
96 DCHECK(pending_loads_.empty());
97 DCHECK(last_live_in_reg_marker_ == nullptr);
98 DCHECK(last_deopt_ == nullptr);
99 DCHECK(operands_map_.empty());
100 sequence()->StartBlock(rpo);
101 }
102
103
EndBlock(RpoNumber rpo)104 void InstructionScheduler::EndBlock(RpoNumber rpo) {
105 if (FLAG_turbo_stress_instruction_scheduling) {
106 ScheduleBlock<StressSchedulerQueue>();
107 } else {
108 ScheduleBlock<CriticalPathFirstQueue>();
109 }
110 sequence()->EndBlock(rpo);
111 graph_.clear();
112 last_side_effect_instr_ = nullptr;
113 pending_loads_.clear();
114 last_live_in_reg_marker_ = nullptr;
115 last_deopt_ = nullptr;
116 operands_map_.clear();
117 }
118
119
AddInstruction(Instruction * instr)120 void InstructionScheduler::AddInstruction(Instruction* instr) {
121 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
122
123 if (IsBlockTerminator(instr)) {
124 // Make sure that basic block terminators are not moved by adding them
125 // as successor of every instruction.
126 for (ScheduleGraphNode* node : graph_) {
127 node->AddSuccessor(new_node);
128 }
129 } else if (IsFixedRegisterParameter(instr)) {
130 if (last_live_in_reg_marker_ != nullptr) {
131 last_live_in_reg_marker_->AddSuccessor(new_node);
132 }
133 last_live_in_reg_marker_ = new_node;
134 } else {
135 if (last_live_in_reg_marker_ != nullptr) {
136 last_live_in_reg_marker_->AddSuccessor(new_node);
137 }
138
139 // Make sure that instructions are not scheduled before the last
140 // deoptimization point when they depend on it.
141 if ((last_deopt_ != nullptr) && DependsOnDeoptimization(instr)) {
142 last_deopt_->AddSuccessor(new_node);
143 }
144
145 // Instructions with side effects and memory operations can't be
146 // reordered with respect to each other.
147 if (HasSideEffect(instr)) {
148 if (last_side_effect_instr_ != nullptr) {
149 last_side_effect_instr_->AddSuccessor(new_node);
150 }
151 for (ScheduleGraphNode* load : pending_loads_) {
152 load->AddSuccessor(new_node);
153 }
154 pending_loads_.clear();
155 last_side_effect_instr_ = new_node;
156 } else if (IsLoadOperation(instr)) {
157 // Load operations can't be reordered with side effects instructions but
158 // independent loads can be reordered with respect to each other.
159 if (last_side_effect_instr_ != nullptr) {
160 last_side_effect_instr_->AddSuccessor(new_node);
161 }
162 pending_loads_.push_back(new_node);
163 } else if (instr->IsDeoptimizeCall()) {
164 // Ensure that deopts are not reordered with respect to side-effect
165 // instructions.
166 if (last_side_effect_instr_ != nullptr) {
167 last_side_effect_instr_->AddSuccessor(new_node);
168 }
169 last_deopt_ = new_node;
170 }
171
172 // Look for operand dependencies.
173 for (size_t i = 0; i < instr->InputCount(); ++i) {
174 const InstructionOperand* input = instr->InputAt(i);
175 if (input->IsUnallocated()) {
176 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
177 auto it = operands_map_.find(vreg);
178 if (it != operands_map_.end()) {
179 it->second->AddSuccessor(new_node);
180 }
181 }
182 }
183
184 // Record the virtual registers defined by this instruction.
185 for (size_t i = 0; i < instr->OutputCount(); ++i) {
186 const InstructionOperand* output = instr->OutputAt(i);
187 if (output->IsUnallocated()) {
188 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
189 new_node;
190 } else if (output->IsConstant()) {
191 operands_map_[ConstantOperand::cast(output)->virtual_register()] =
192 new_node;
193 }
194 }
195 }
196
197 graph_.push_back(new_node);
198 }
199
200
201 template <typename QueueType>
ScheduleBlock()202 void InstructionScheduler::ScheduleBlock() {
203 QueueType ready_list(this);
204
205 // Compute total latencies so that we can schedule the critical path first.
206 ComputeTotalLatencies();
207
208 // Add nodes which don't have dependencies to the ready list.
209 for (ScheduleGraphNode* node : graph_) {
210 if (!node->HasUnscheduledPredecessor()) {
211 ready_list.AddNode(node);
212 }
213 }
214
215 // Go through the ready list and schedule the instructions.
216 int cycle = 0;
217 while (!ready_list.IsEmpty()) {
218 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
219
220 if (candidate != nullptr) {
221 sequence()->AddInstruction(candidate->instruction());
222
223 for (ScheduleGraphNode* successor : candidate->successors()) {
224 successor->DropUnscheduledPredecessor();
225 successor->set_start_cycle(
226 std::max(successor->start_cycle(),
227 cycle + candidate->latency()));
228
229 if (!successor->HasUnscheduledPredecessor()) {
230 ready_list.AddNode(successor);
231 }
232 }
233 }
234
235 cycle++;
236 }
237 }
238
239
GetInstructionFlags(const Instruction * instr) const240 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
241 switch (instr->arch_opcode()) {
242 case kArchNop:
243 case kArchFramePointer:
244 case kArchParentFramePointer:
245 case kArchTruncateDoubleToI:
246 case kArchStackSlot:
247 case kArchDebugBreak:
248 case kArchComment:
249 case kIeee754Float64Acos:
250 case kIeee754Float64Acosh:
251 case kIeee754Float64Asin:
252 case kIeee754Float64Asinh:
253 case kIeee754Float64Atan:
254 case kIeee754Float64Atanh:
255 case kIeee754Float64Atan2:
256 case kIeee754Float64Cbrt:
257 case kIeee754Float64Cos:
258 case kIeee754Float64Cosh:
259 case kIeee754Float64Exp:
260 case kIeee754Float64Expm1:
261 case kIeee754Float64Log:
262 case kIeee754Float64Log1p:
263 case kIeee754Float64Log10:
264 case kIeee754Float64Log2:
265 case kIeee754Float64Pow:
266 case kIeee754Float64Sin:
267 case kIeee754Float64Sinh:
268 case kIeee754Float64Tan:
269 case kIeee754Float64Tanh:
270 return kNoOpcodeFlags;
271
272 case kArchStackPointer:
273 // ArchStackPointer instruction loads the current stack pointer value and
274 // must not be reordered with instruction with side effects.
275 return kIsLoadOperation;
276
277 case kArchPrepareCallCFunction:
278 case kArchPrepareTailCall:
279 case kArchCallCFunction:
280 case kArchCallCodeObject:
281 case kArchCallJSFunction:
282 return kHasSideEffect;
283
284 case kArchTailCallCodeObjectFromJSFunction:
285 case kArchTailCallCodeObject:
286 case kArchTailCallJSFunctionFromJSFunction:
287 case kArchTailCallAddress:
288 return kHasSideEffect | kIsBlockTerminator;
289
290 case kArchDeoptimize:
291 case kArchJmp:
292 case kArchLookupSwitch:
293 case kArchTableSwitch:
294 case kArchRet:
295 case kArchThrowTerminator:
296 return kIsBlockTerminator;
297
298 case kCheckedLoadInt8:
299 case kCheckedLoadUint8:
300 case kCheckedLoadInt16:
301 case kCheckedLoadUint16:
302 case kCheckedLoadWord32:
303 case kCheckedLoadWord64:
304 case kCheckedLoadFloat32:
305 case kCheckedLoadFloat64:
306 return kIsLoadOperation;
307
308 case kCheckedStoreWord8:
309 case kCheckedStoreWord16:
310 case kCheckedStoreWord32:
311 case kCheckedStoreWord64:
312 case kCheckedStoreFloat32:
313 case kCheckedStoreFloat64:
314 case kArchStoreWithWriteBarrier:
315 return kHasSideEffect;
316
317 case kAtomicLoadInt8:
318 case kAtomicLoadUint8:
319 case kAtomicLoadInt16:
320 case kAtomicLoadUint16:
321 case kAtomicLoadWord32:
322 return kIsLoadOperation;
323
324 case kAtomicStoreWord8:
325 case kAtomicStoreWord16:
326 case kAtomicStoreWord32:
327 return kHasSideEffect;
328
329 #define CASE(Name) case k##Name:
330 TARGET_ARCH_OPCODE_LIST(CASE)
331 #undef CASE
332 return GetTargetInstructionFlags(instr);
333 }
334
335 UNREACHABLE();
336 return kNoOpcodeFlags;
337 }
338
339
IsBlockTerminator(const Instruction * instr) const340 bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
341 return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
342 (instr->flags_mode() == kFlags_branch));
343 }
344
345
ComputeTotalLatencies()346 void InstructionScheduler::ComputeTotalLatencies() {
347 for (ScheduleGraphNode* node : base::Reversed(graph_)) {
348 int max_latency = 0;
349
350 for (ScheduleGraphNode* successor : node->successors()) {
351 DCHECK(successor->total_latency() != -1);
352 if (successor->total_latency() > max_latency) {
353 max_latency = successor->total_latency();
354 }
355 }
356
357 node->set_total_latency(max_latency + node->latency());
358 }
359 }
360
361 } // namespace compiler
362 } // namespace internal
363 } // namespace v8
364